diff --git a/.config/nextest.toml b/.config/nextest.toml index 3542bba5f5e8..e107857a3511 100644 --- a/.config/nextest.toml +++ b/.config/nextest.toml @@ -4,4 +4,4 @@ slow-timeout = { period = "30s", terminate-after = 4 } [[profile.default.overrides]] filter = "test(general_state_tests)" -slow-timeout = { period = "1m", terminate-after = 4 } +slow-timeout = { period = "1m", terminate-after = 10 } diff --git a/CODEOWNERS b/CODEOWNERS index 22cc62778ddb..7ec66f759789 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -19,5 +19,5 @@ crates/metrics @onbjerg crates/tracing @onbjerg crates/tasks @mattsse crates/prune @shekhirin @joshieDo -crates/snapshot @joshieDo +crates/static-file @joshieDo @shekhirin .github/ @onbjerg @gakonst @DaniPopes diff --git a/Cargo.lock b/Cargo.lock index f14edd0cfbc0..dd770911df3d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2301,6 +2301,7 @@ name = "ef-tests" version = "0.1.0-alpha.21" dependencies = [ "alloy-rlp", + "rayon", "reth-db", "reth-interfaces", "reth-node-ethereum", @@ -2727,6 +2728,7 @@ dependencies = [ "async-trait", "eyre", "futures", + "jemallocator", "reth-beacon-consensus", "reth-blockchain-tree", "reth-db", @@ -5690,6 +5692,12 @@ dependencies = [ "quick-error", ] +[[package]] +name = "retain_mut" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c31b5c4033f8fdde8700e4657be2c497e7288f01515be52168c631e2e4d4086" + [[package]] name = "reth" version = "0.1.0-alpha.21" @@ -5755,8 +5763,8 @@ dependencies = [ "reth-rpc-engine-api", "reth-rpc-types", "reth-rpc-types-compat", - "reth-snapshot", "reth-stages", + "reth-static-file", "reth-tasks", "reth-tracing", "reth-transaction-pool", @@ -5844,12 +5852,13 @@ dependencies = [ "reth-revm", "reth-rpc-types", "reth-rpc-types-compat", - "reth-snapshot", "reth-stages", + "reth-static-file", "reth-tasks", "reth-tokio-util", "reth-tracing", "schnellru", + "tempfile", "thiserror", "tokio", "tokio-stream", @@ -6143,6 +6152,16 @@ dependencies = [ "tracing", ] +[[package]] +name = "reth-etl" +version = "0.1.0-alpha.21" +dependencies = [ + "rayon", + "reth-db", + "reth-primitives", + "tempfile", +] + [[package]] name = "reth-interfaces" version = "0.1.0-alpha.21" @@ -6344,8 +6363,9 @@ dependencies = [ "memmap2 0.7.1", "ph", "rand 0.8.5", + "reth-primitives", "serde", - "sucds 0.8.1", + "sucds", "tempfile", "thiserror", "tracing", @@ -6388,8 +6408,8 @@ dependencies = [ "reth-revm", "reth-rpc", "reth-rpc-engine-api", - "reth-snapshot", "reth-stages", + "reth-static-file", "reth-tasks", "reth-tracing", "reth-transaction-pool", @@ -6405,6 +6425,7 @@ dependencies = [ "assert_matches", "clap", "const-str", + "derive_more", "dirs-next", "eyre", "futures", @@ -6449,8 +6470,8 @@ dependencies = [ "reth-rpc-engine-api", "reth-rpc-types", "reth-rpc-types-compat", - "reth-snapshot", "reth-stages", + "reth-static-file", "reth-tasks", "reth-tracing", "reth-transaction-pool", @@ -6571,6 +6592,7 @@ dependencies = [ "alloy-primitives", "alloy-rlp", "alloy-trie", + "anyhow", "arbitrary", "assert_matches", "byteorder", @@ -6598,12 +6620,13 @@ dependencies = [ "reth-rpc-types", "revm", "revm-primitives", + "roaring", "secp256k1 0.27.0", "serde", "serde_json", "sha2", "strum 0.26.1", - "sucds 0.6.0", + "sucds", "tempfile", "test-fuzz", "thiserror", @@ -6627,6 +6650,7 @@ dependencies = [ "pin-project", "rand 0.8.5", "rayon", + "reth-codecs", "reth-db", "reth-interfaces", "reth-metrics", @@ -6656,8 +6680,8 @@ dependencies = [ "reth-metrics", "reth-primitives", "reth-provider", - "reth-snapshot", "reth-stages", + "reth-static-file", "reth-tokio-util", "thiserror", "tokio", @@ -6867,24 +6891,6 @@ dependencies = [ "serde_json", ] -[[package]] -name = "reth-snapshot" -version = "0.1.0-alpha.21" -dependencies = [ - "assert_matches", - "clap", - "reth-db", - "reth-interfaces", - "reth-nippy-jar", - "reth-primitives", - "reth-provider", - "reth-stages", - "tempfile", - "thiserror", - "tokio", - "tracing", -] - [[package]] name = "reth-stages" version = "0.1.0-alpha.21" @@ -6908,6 +6914,7 @@ dependencies = [ "reth-db", "reth-downloaders", "reth-eth-wire", + "reth-etl", "reth-interfaces", "reth-metrics", "reth-node-ethereum", @@ -6915,11 +6922,34 @@ dependencies = [ "reth-primitives", "reth-provider", "reth-revm", + "reth-static-file", "reth-tokio-util", "reth-trie", "revm", "serde", "serde_json", + "tempfile", + "thiserror", + "tokio", + "tokio-stream", + "tracing", +] + +[[package]] +name = "reth-static-file" +version = "0.1.0-alpha.21" +dependencies = [ + "assert_matches", + "clap", + "rayon", + "reth-db", + "reth-interfaces", + "reth-nippy-jar", + "reth-primitives", + "reth-provider", + "reth-stages", + "reth-tokio-util", + "tempfile", "thiserror", "tokio", "tokio-stream", @@ -7194,6 +7224,17 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "roaring" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6106b5cf8587f5834158895e9715a3c6c9716c8aefab57f1f7680917191c7873" +dependencies = [ + "bytemuck", + "byteorder", + "retain_mut", +] + [[package]] name = "rolling-file" version = "0.2.0" @@ -8068,15 +8109,6 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" -[[package]] -name = "sucds" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64accd20141dfbef67ad83c51d588146cff7810616e1bda35a975be369059533" -dependencies = [ - "anyhow", -] - [[package]] name = "sucds" version = "0.8.1" diff --git a/Cargo.toml b/Cargo.toml index db425fd72dd5..c8488c6d1098 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,6 +8,7 @@ members = [ "crates/consensus/beacon-core/", "crates/consensus/common/", "crates/ethereum-forks/", + "crates/etl", "crates/interfaces/", "crates/metrics/", "crates/metrics/metrics-derive/", @@ -41,8 +42,8 @@ members = [ "crates/node-optimism/", "crates/node-core/", "crates/node-api/", - "crates/snapshot/", "crates/stages/", + "crates/static-file/", "crates/storage/codecs/", "crates/storage/codecs/derive/", "crates/storage/db/", @@ -137,6 +138,7 @@ reth-ecies = { path = "crates/net/ecies" } reth-eth-wire = { path = "crates/net/eth-wire" } reth-ethereum-forks = { path = "crates/ethereum-forks" } reth-ethereum-payload-builder = { path = "crates/payload/ethereum" } +reth-etl = { path = "crates/etl" } reth-optimism-payload-builder = { path = "crates/payload/optimism" } reth-interfaces = { path = "crates/interfaces" } reth-ipc = { path = "crates/rpc/ipc" } @@ -162,8 +164,8 @@ reth-rpc-builder = { path = "crates/rpc/rpc-builder" } reth-rpc-engine-api = { path = "crates/rpc/rpc-engine-api" } reth-rpc-types = { path = "crates/rpc/rpc-types" } reth-rpc-types-compat = { path = "crates/rpc/rpc-types-compat" } -reth-snapshot = { path = "crates/snapshot" } reth-stages = { path = "crates/stages" } +reth-static-file = { path = "crates/static-file" } reth-tasks = { path = "crates/tasks" } reth-tokio-util = { path = "crates/tokio-util" } reth-tracing = { path = "crates/tracing" } diff --git a/README.md b/README.md index d74df398401c..46b03ccb239c 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# reth +# reth [![CI status](https://github.com/paradigmxyz/reth/workflows/ci/badge.svg)][gh-ci] [![cargo-deny status](https://github.com/paradigmxyz/reth/workflows/deny/badge.svg)][gh-deny] @@ -14,7 +14,7 @@ | [Developer Docs](./docs) | [Crate Docs](https://paradigmxyz.github.io/reth/docs) -*The project is still work in progress, see the [disclaimer below](#status).* +_The project is still work in progress, see the [disclaimer below](#status)._ [codecov]: https://app.codecov.io/gh/paradigmxyz/reth [gh-ci]: https://github.com/paradigmxyz/reth/actions/workflows/ci.yml @@ -50,6 +50,14 @@ We will be updating the documentation with the completion status of each compone We appreciate your patience until we get there. Until then, we are happy to answer all questions in the Telegram link above. +### Database compatibility + +Reth [v0.2.0-beta.1](https://github.com/paradigmxyz/reth/releases/tag/v0.2.0-beta.1) includes +a [set of breaking database changes](https://github.com/paradigmxyz/reth/pull/5191) that makes it impossible to use database files produced by earlier versions. + +If you had a database produced by alpha versions of Reth, you need to drop it with `reth db drop` +(using the same arguments such as `--config` or `--datadir` that you passed to `reth node`), and resync using the same `reth node` command you've used before. + ## For Users See the [Reth Book](https://paradigmxyz.github.io/reth) for instructions on how to install and run Reth. @@ -105,7 +113,7 @@ cargo test --workspace --features geth-tests # With Ethereum Foundation tests # # Note: Requires cloning https://github.com/ethereum/tests -# +# # cd testing/ef-tests && git clone https://github.com/ethereum/tests ethereum-tests cargo test -p ef-tests --features ef-tests ``` @@ -113,7 +121,7 @@ cargo test -p ef-tests --features ef-tests We recommend using [`cargo nextest`](https://nexte.st/) to speed up testing. With nextest installed, simply substitute `cargo test` with `cargo nextest run`. > **Note** -> +> > Some tests use random number generators to generate test data. If you want to use a deterministic seed, you can set the `SEED` environment variable. ## Getting Help @@ -135,9 +143,10 @@ See [`SECURITY.md`](./SECURITY.md). Reth is a new implementation of the Ethereum protocol. In the process of developing the node we investigated the design decisions other nodes have made to understand what is done well, what is not, and where we can improve the status quo. None of this would have been possible without them, so big shoutout to the teams below: -* [Geth](https://github.com/ethereum/go-ethereum/): We would like to express our heartfelt gratitude to the go-ethereum team for their outstanding contributions to Ethereum over the years. Their tireless efforts and dedication have helped to shape the Ethereum ecosystem and make it the vibrant and innovative community it is today. Thank you for your hard work and commitment to the project. -* [Erigon](https://github.com/ledgerwatch/erigon) (fka Turbo-Geth): Erigon pioneered the ["Staged Sync" architecture](https://erigon.substack.com/p/erigon-stage-sync-and-control-flows) that Reth is using, as well as [introduced MDBX](https://github.com/ledgerwatch/erigon/wiki/Choice-of-storage-engine) as the database of choice. We thank Erigon for pushing the state of the art research on the performance limits of Ethereum nodes. -* [Akula](https://github.com/akula-bft/akula/): Reth uses forks of the Apache versions of Akula's [MDBX Bindings](https://github.com/paradigmxyz/reth/pull/132), [FastRLP](https://github.com/paradigmxyz/reth/pull/63) and [ECIES](https://github.com/paradigmxyz/reth/pull/80) . Given that these packages were already released under the Apache License, and they implement standardized solutions, we decided not to reimplement them to iterate faster. We thank the Akula team for their contributions to the Rust Ethereum ecosystem and for publishing these packages. + +- [Geth](https://github.com/ethereum/go-ethereum/): We would like to express our heartfelt gratitude to the go-ethereum team for their outstanding contributions to Ethereum over the years. Their tireless efforts and dedication have helped to shape the Ethereum ecosystem and make it the vibrant and innovative community it is today. Thank you for your hard work and commitment to the project. +- [Erigon](https://github.com/ledgerwatch/erigon) (fka Turbo-Geth): Erigon pioneered the ["Staged Sync" architecture](https://erigon.substack.com/p/erigon-stage-sync-and-control-flows) that Reth is using, as well as [introduced MDBX](https://github.com/ledgerwatch/erigon/wiki/Choice-of-storage-engine) as the database of choice. We thank Erigon for pushing the state of the art research on the performance limits of Ethereum nodes. +- [Akula](https://github.com/akula-bft/akula/): Reth uses forks of the Apache versions of Akula's [MDBX Bindings](https://github.com/paradigmxyz/reth/pull/132), [FastRLP](https://github.com/paradigmxyz/reth/pull/63) and [ECIES](https://github.com/paradigmxyz/reth/pull/80) . Given that these packages were already released under the Apache License, and they implement standardized solutions, we decided not to reimplement them to iterate faster. We thank the Akula team for their contributions to the Rust Ethereum ecosystem and for publishing these packages. [book]: https://paradigmxyz.github.io/reth/ [tg-url]: https://t.me/paradigm_reth diff --git a/bin/reth/Cargo.toml b/bin/reth/Cargo.toml index 1d1d4639ff7d..afc71932e235 100644 --- a/bin/reth/Cargo.toml +++ b/bin/reth/Cargo.toml @@ -50,7 +50,7 @@ reth-payload-validator.workspace = true reth-basic-payload-builder.workspace = true reth-discv4.workspace = true reth-prune.workspace = true -reth-snapshot = { workspace = true, features = ["clap"] } +reth-static-file = { workspace = true, features = ["clap"] } reth-trie.workspace = true reth-nippy-jar.workspace = true reth-node-api.workspace = true diff --git a/bin/reth/src/builder.rs b/bin/reth/src/builder.rs index dc75a679c17d..d6f18c5039bb 100644 --- a/bin/reth/src/builder.rs +++ b/bin/reth/src/builder.rs @@ -6,7 +6,7 @@ use fdlimit::raise_fd_limit; use futures::{future::Either, stream, stream_select, StreamExt}; use reth_auto_seal_consensus::AutoSealBuilder; use reth_beacon_consensus::{ - hooks::{EngineHooks, PruneHook}, + hooks::{EngineHooks, PruneHook, StaticFileHook}, BeaconConsensusEngine, MIN_BLOCKS_FOR_PIPELINE_RUN, }; use reth_blockchain_tree::{config::BlockchainTreeConfig, ShareableBlockchainTree}; @@ -40,6 +40,7 @@ use reth_primitives::format_ether; use reth_provider::{providers::BlockchainProvider, ProviderFactory}; use reth_prune::PrunerBuilder; use reth_rpc_engine_api::EngineApi; +use reth_static_file::StaticFileProducer; use reth_tasks::{TaskExecutor, TaskManager}; use reth_transaction_pool::TransactionPool; use std::{path::PathBuf, sync::Arc}; @@ -127,26 +128,18 @@ impl NodeBuilderWit let prometheus_handle = self.config.install_prometheus_recorder()?; - let mut provider_factory = - ProviderFactory::new(Arc::clone(&self.db), Arc::clone(&self.config.chain)); - - // configure snapshotter - let snapshotter = reth_snapshot::Snapshotter::new( - provider_factory.clone(), - self.data_dir.snapshots_path(), - self.config.chain.snapshot_block_interval, - )?; - - provider_factory = provider_factory.with_snapshots( - self.data_dir.snapshots_path(), - snapshotter.highest_snapshot_receiver(), - )?; + let provider_factory = ProviderFactory::new( + Arc::clone(&self.db), + Arc::clone(&self.config.chain), + self.data_dir.static_files_path(), + )? + .with_static_files_metrics(); self.config.start_metrics_endpoint(prometheus_handle, Arc::clone(&self.db)).await?; debug!(target: "reth::cli", chain=%self.config.chain.chain, genesis=?self.config.chain.genesis_hash(), "Initializing genesis"); - let genesis_hash = init_genesis(Arc::clone(&self.db), self.config.chain.clone())?; + let genesis_hash = init_genesis(provider_factory.clone())?; info!(target: "reth::cli", "{}", self.config.chain.display_hardforks()); @@ -270,6 +263,17 @@ impl NodeBuilderWit }; let max_block = self.config.max_block(&network_client, provider_factory.clone()).await?; + let mut hooks = EngineHooks::new(); + + let mut static_file_producer = StaticFileProducer::new( + provider_factory.clone(), + provider_factory.static_file_provider(), + prune_config.clone().unwrap_or_default().segments, + ); + let static_file_producer_events = static_file_producer.events(); + hooks.add(StaticFileHook::new(static_file_producer.clone(), Box::new(executor.clone()))); + info!(target: "reth::cli", "StaticFileProducer initialized"); + // Configure the pipeline let (mut pipeline, client) = if self.config.dev.dev { info!(target: "reth::cli", "Starting Reth in dev mode"); @@ -301,6 +305,7 @@ impl NodeBuilderWit sync_metrics_tx, prune_config.clone(), max_block, + static_file_producer, evm_config, ) .await?; @@ -323,6 +328,7 @@ impl NodeBuilderWit sync_metrics_tx, prune_config.clone(), max_block, + static_file_producer, evm_config, ) .await?; @@ -333,22 +339,16 @@ impl NodeBuilderWit let pipeline_events = pipeline.events(); let initial_target = self.config.initial_pipeline_target(genesis_hash); - let mut hooks = EngineHooks::new(); - let pruner_events = if let Some(prune_config) = prune_config { - let mut pruner = PrunerBuilder::new(prune_config.clone()) - .max_reorg_depth(tree_config.max_reorg_depth() as usize) - .prune_delete_limit(self.config.chain.prune_delete_limit) - .build(provider_factory, snapshotter.highest_snapshot_receiver()); + let prune_config = prune_config.unwrap_or_default(); + let mut pruner = PrunerBuilder::new(prune_config.clone()) + .max_reorg_depth(tree_config.max_reorg_depth() as usize) + .prune_delete_limit(self.config.chain.prune_delete_limit) + .build(provider_factory.clone()); - let events = pruner.events(); - hooks.add(PruneHook::new(pruner, Box::new(executor.clone()))); - - info!(target: "reth::cli", ?prune_config, "Pruner initialized"); - Either::Left(events) - } else { - Either::Right(stream::empty()) - }; + let pruner_events = pruner.events(); + hooks.add(PruneHook::new(pruner, Box::new(executor.clone()))); + info!(target: "reth::cli", ?prune_config, "Pruner initialized"); // Configure the consensus engine let (beacon_consensus_engine, beacon_engine_handle) = BeaconConsensusEngine::with_channel( @@ -380,7 +380,8 @@ impl NodeBuilderWit } else { Either::Right(stream::empty()) }, - pruner_events.map(Into::into) + pruner_events.map(Into::into), + static_file_producer_events.map(Into::into), ); executor.spawn_critical( "events task", diff --git a/bin/reth/src/commands/db/clear.rs b/bin/reth/src/commands/db/clear.rs index e26f60631afe..a7c32cac1b4e 100644 --- a/bin/reth/src/commands/db/clear.rs +++ b/bin/reth/src/commands/db/clear.rs @@ -1,27 +1,53 @@ -use clap::Parser; +use clap::{Parser, Subcommand}; use reth_db::{ database::Database, + static_file::iter_static_files, table::Table, transaction::{DbTx, DbTxMut}, TableViewer, Tables, }; +use reth_primitives::{static_file::find_fixed_range, StaticFileSegment}; +use reth_provider::ProviderFactory; /// The arguments for the `reth db clear` command #[derive(Parser, Debug)] pub struct Command { - /// Table name - pub table: Tables, + #[clap(subcommand)] + subcommand: Subcommands, } impl Command { /// Execute `db clear` command - pub fn execute(self, db: &DB) -> eyre::Result<()> { - self.table.view(&ClearViewer { db })?; + pub fn execute(self, provider_factory: ProviderFactory) -> eyre::Result<()> { + match self.subcommand { + Subcommands::Mdbx { table } => { + table.view(&ClearViewer { db: provider_factory.db_ref() })? + } + Subcommands::StaticFile { segment } => { + let static_file_provider = provider_factory.static_file_provider(); + let static_files = iter_static_files(static_file_provider.directory())?; + + if let Some(segment_static_files) = static_files.get(&segment) { + for (block_range, _) in segment_static_files { + static_file_provider + .delete_jar(segment, find_fixed_range(block_range.start()))?; + } + } + } + } Ok(()) } } +#[derive(Subcommand, Debug)] +enum Subcommands { + /// Deletes all database table entries + Mdbx { table: Tables }, + /// Deletes all static file segment entries + StaticFile { segment: StaticFileSegment }, +} + struct ClearViewer<'a, DB: Database> { db: &'a DB, } diff --git a/bin/reth/src/commands/db/diff.rs b/bin/reth/src/commands/db/diff.rs index 313db210884c..d36278ca1b45 100644 --- a/bin/reth/src/commands/db/diff.rs +++ b/bin/reth/src/commands/db/diff.rs @@ -6,11 +6,12 @@ use crate::{ use clap::Parser; use reth_db::{ cursor::DbCursorRO, database::Database, mdbx::DatabaseArguments, open_db_read_only, - table::Table, transaction::DbTx, AccountChangeSet, AccountHistory, AccountsTrie, + table::Table, transaction::DbTx, AccountChangeSets, AccountsHistory, AccountsTrie, BlockBodyIndices, BlockOmmers, BlockWithdrawals, Bytecodes, CanonicalHeaders, DatabaseEnv, - HashedAccount, HashedStorage, HeaderNumbers, HeaderTD, Headers, PlainAccountState, - PlainStorageState, PruneCheckpoints, Receipts, StorageChangeSet, StorageHistory, StoragesTrie, - SyncStage, SyncStageProgress, Tables, TransactionBlock, Transactions, TxHashNumber, TxSenders, + HashedAccounts, HashedStorages, HeaderNumbers, HeaderTerminalDifficulties, Headers, + PlainAccountState, PlainStorageState, PruneCheckpoints, Receipts, StageCheckpointProgresses, + StageCheckpoints, StorageChangeSets, StoragesHistory, StoragesTrie, Tables, TransactionBlocks, + TransactionHashNumbers, TransactionSenders, Transactions, }; use std::{ collections::HashMap, @@ -56,7 +57,7 @@ impl Command { /// /// The discrepancies and extra elements, along with a brief summary of the diff results are /// then written to a file in the output directory. - pub fn execute(self, tool: &DbTool<'_, DatabaseEnv>) -> eyre::Result<()> { + pub fn execute(self, tool: &DbTool) -> eyre::Result<()> { // open second db let second_db_path: PathBuf = self.secondary_datadir.join("db").into(); let second_db = open_db_read_only( @@ -70,7 +71,7 @@ impl Command { }; for table in tables { - let primary_tx = tool.db.tx()?; + let primary_tx = tool.provider_factory.db_ref().tx()?; let secondary_tx = second_db.tx()?; let output_dir = self.output.clone(); @@ -78,7 +79,9 @@ impl Command { Tables::CanonicalHeaders => { find_diffs::(primary_tx, secondary_tx, output_dir)? } - Tables::HeaderTD => find_diffs::(primary_tx, secondary_tx, output_dir)?, + Tables::HeaderTerminalDifficulties => { + find_diffs::(primary_tx, secondary_tx, output_dir)? + } Tables::HeaderNumbers => { find_diffs::(primary_tx, secondary_tx, output_dir)? } @@ -92,14 +95,14 @@ impl Command { Tables::BlockWithdrawals => { find_diffs::(primary_tx, secondary_tx, output_dir)? } - Tables::TransactionBlock => { - find_diffs::(primary_tx, secondary_tx, output_dir)? + Tables::TransactionBlocks => { + find_diffs::(primary_tx, secondary_tx, output_dir)? } Tables::Transactions => { find_diffs::(primary_tx, secondary_tx, output_dir)? } - Tables::TxHashNumber => { - find_diffs::(primary_tx, secondary_tx, output_dir)? + Tables::TransactionHashNumbers => { + find_diffs::(primary_tx, secondary_tx, output_dir)? } Tables::Receipts => find_diffs::(primary_tx, secondary_tx, output_dir)?, Tables::PlainAccountState => { @@ -109,23 +112,23 @@ impl Command { find_diffs::(primary_tx, secondary_tx, output_dir)? } Tables::Bytecodes => find_diffs::(primary_tx, secondary_tx, output_dir)?, - Tables::AccountHistory => { - find_diffs::(primary_tx, secondary_tx, output_dir)? + Tables::AccountsHistory => { + find_diffs::(primary_tx, secondary_tx, output_dir)? } - Tables::StorageHistory => { - find_diffs::(primary_tx, secondary_tx, output_dir)? + Tables::StoragesHistory => { + find_diffs::(primary_tx, secondary_tx, output_dir)? } - Tables::AccountChangeSet => { - find_diffs::(primary_tx, secondary_tx, output_dir)? + Tables::AccountChangeSets => { + find_diffs::(primary_tx, secondary_tx, output_dir)? } - Tables::StorageChangeSet => { - find_diffs::(primary_tx, secondary_tx, output_dir)? + Tables::StorageChangeSets => { + find_diffs::(primary_tx, secondary_tx, output_dir)? } - Tables::HashedAccount => { - find_diffs::(primary_tx, secondary_tx, output_dir)? + Tables::HashedAccounts => { + find_diffs::(primary_tx, secondary_tx, output_dir)? } - Tables::HashedStorage => { - find_diffs::(primary_tx, secondary_tx, output_dir)? + Tables::HashedStorages => { + find_diffs::(primary_tx, secondary_tx, output_dir)? } Tables::AccountsTrie => { find_diffs::(primary_tx, secondary_tx, output_dir)? @@ -133,10 +136,14 @@ impl Command { Tables::StoragesTrie => { find_diffs::(primary_tx, secondary_tx, output_dir)? } - Tables::TxSenders => find_diffs::(primary_tx, secondary_tx, output_dir)?, - Tables::SyncStage => find_diffs::(primary_tx, secondary_tx, output_dir)?, - Tables::SyncStageProgress => { - find_diffs::(primary_tx, secondary_tx, output_dir)? + Tables::TransactionSenders => { + find_diffs::(primary_tx, secondary_tx, output_dir)? + } + Tables::StageCheckpoints => { + find_diffs::(primary_tx, secondary_tx, output_dir)? + } + Tables::StageCheckpointProgresses => { + find_diffs::(primary_tx, secondary_tx, output_dir)? } Tables::PruneCheckpoints => { find_diffs::(primary_tx, secondary_tx, output_dir)? diff --git a/bin/reth/src/commands/db/get.rs b/bin/reth/src/commands/db/get.rs index dc64c08e31da..1ffe038e96a2 100644 --- a/bin/reth/src/commands/db/get.rs +++ b/bin/reth/src/commands/db/get.rs @@ -2,64 +2,152 @@ use crate::utils::DbTool; use clap::Parser; use reth_db::{ database::Database, - table::{DupSort, Table}, - RawKey, RawTable, TableViewer, Tables, + static_file::{ColumnSelectorOne, ColumnSelectorTwo, HeaderMask, ReceiptMask, TransactionMask}, + table::{Decompress, DupSort, Table}, + tables, RawKey, RawTable, Receipts, TableViewer, Transactions, }; +use reth_primitives::{BlockHash, Header, StaticFileSegment}; use tracing::error; /// The arguments for the `reth db get` command #[derive(Parser, Debug)] pub struct Command { - /// The table name - /// - /// NOTE: The dupsort tables are not supported now. - pub table: Tables, - - /// The key to get content for - #[arg(value_parser = maybe_json_value_parser)] - pub key: String, - - /// The subkey to get content for - #[arg(value_parser = maybe_json_value_parser)] - pub subkey: Option, - - /// Output bytes instead of human-readable decoded value - #[clap(long)] - pub raw: bool, + #[clap(subcommand)] + subcommand: Subcommand, +} + +#[derive(clap::Subcommand, Debug)] +enum Subcommand { + /// Gets the content of a database table for the given key + Mdbx { + table: tables::Tables, + + /// The key to get content for + #[arg(value_parser = maybe_json_value_parser)] + key: String, + + /// The subkey to get content for + #[arg(value_parser = maybe_json_value_parser)] + subkey: Option, + + /// Output bytes instead of human-readable decoded value + #[clap(long)] + raw: bool, + }, + /// Gets the content of a static file segment for the given key + StaticFile { + segment: StaticFileSegment, + + /// The key to get content for + #[arg(value_parser = maybe_json_value_parser)] + key: String, + + /// Output bytes instead of human-readable decoded value + #[clap(long)] + raw: bool, + }, } impl Command { /// Execute `db get` command - pub fn execute(self, tool: &DbTool<'_, DB>) -> eyre::Result<()> { - self.table.view(&GetValueViewer { tool, args: &self }) - } + pub fn execute(self, tool: &DbTool) -> eyre::Result<()> { + match self.subcommand { + Subcommand::Mdbx { table, key, subkey, raw } => { + table.view(&GetValueViewer { tool, key, subkey, raw })? + } + Subcommand::StaticFile { segment, key, raw } => { + let (key, mask): (u64, _) = match segment { + StaticFileSegment::Headers => { + (table_key::(&key)?, >::MASK) + } + StaticFileSegment::Transactions => ( + table_key::(&key)?, + ::Value>>::MASK, + ), + StaticFileSegment::Receipts => ( + table_key::(&key)?, + ::Value>>::MASK, + ), + }; - /// Get an instance of key for given table - pub fn table_key(&self) -> Result { - assert_eq!(T::TABLE, self.table); - serde_json::from_str::(&self.key).map_err(Into::into) - } + let content = tool.provider_factory.static_file_provider().find_static_file( + segment, + |provider| { + let mut cursor = provider.cursor()?; + cursor.get(key.into(), mask).map(|result| { + result.map(|vec| { + vec.iter().map(|slice| slice.to_vec()).collect::>() + }) + }) + }, + )?; - /// Get an instance of subkey for given dupsort table - fn table_subkey(&self) -> Result { - assert_eq!(T::TABLE, self.table); - serde_json::from_str::(&self.subkey.clone().unwrap_or_default()) - .map_err(Into::into) + match content { + Some(content) => { + if raw { + println!("{:?}", content); + } else { + match segment { + StaticFileSegment::Headers => { + let header = Header::decompress(content[0].as_slice())?; + let block_hash = BlockHash::decompress(content[1].as_slice())?; + println!( + "{}\n{}", + serde_json::to_string_pretty(&header)?, + serde_json::to_string_pretty(&block_hash)? + ); + } + StaticFileSegment::Transactions => { + let transaction = <::Value>::decompress( + content[0].as_slice(), + )?; + println!("{}", serde_json::to_string_pretty(&transaction)?); + } + StaticFileSegment::Receipts => { + let receipt = <::Value>::decompress( + content[0].as_slice(), + )?; + println!("{}", serde_json::to_string_pretty(&receipt)?); + } + } + } + } + None => { + error!(target: "reth::cli", "No content for the given table key."); + } + }; + } + } + + Ok(()) } } +/// Get an instance of key for given table +fn table_key(key: &str) -> Result { + serde_json::from_str::(key).map_err(|e| eyre::eyre!(e)) +} + +/// Get an instance of subkey for given dupsort table +fn table_subkey(subkey: &Option) -> Result { + serde_json::from_str::(&subkey.clone().unwrap_or_default()) + .map_err(|e| eyre::eyre!(e)) +} + struct GetValueViewer<'a, DB: Database> { - tool: &'a DbTool<'a, DB>, - args: &'a Command, + tool: &'a DbTool, + key: String, + subkey: Option, + raw: bool, } impl TableViewer<()> for GetValueViewer<'_, DB> { type Error = eyre::Report; fn view(&self) -> Result<(), Self::Error> { - let key = self.args.table_key::()?; + let key = table_key::(&self.key)?; - let content = if self.args.raw { + let content = if self.raw { self.tool .get::>(RawKey::from(key))? .map(|content| format!("{:?}", content.raw_value())) @@ -81,10 +169,10 @@ impl TableViewer<()> for GetValueViewer<'_, DB> { fn view_dupsort(&self) -> Result<(), Self::Error> { // get a key for given table - let key = self.args.table_key::()?; + let key = table_key::(&self.key)?; // process dupsort table - let subkey = self.args.table_subkey::()?; + let subkey = table_subkey::(&self.subkey)?; match self.tool.get_dup::(key, subkey)? { Some(content) => { @@ -113,7 +201,7 @@ mod tests { use clap::{Args, Parser}; use reth_db::{ models::{storage_sharded_key::StorageShardedKey, ShardedKey}, - AccountHistory, HashedAccount, Headers, StorageHistory, SyncStage, + AccountsHistory, HashedAccounts, Headers, StageCheckpoints, StoragesHistory, }; use reth_primitives::{Address, B256}; use std::str::FromStr; @@ -127,17 +215,12 @@ mod tests { #[test] fn parse_numeric_key_args() { - let args = CommandParser::::parse_from(["reth", "Headers", "123"]).args; - assert_eq!(args.table_key::().unwrap(), 123); - - let args = CommandParser::::parse_from([ - "reth", - "HashedAccount", - "0x0ac361fe774b78f8fc4e86c1916930d150865c3fc2e21dca2e58833557608bac", - ]) - .args; + assert_eq!(table_key::("123").unwrap(), 123); assert_eq!( - args.table_key::().unwrap(), + table_key::( + "\"0x0ac361fe774b78f8fc4e86c1916930d150865c3fc2e21dca2e58833557608bac\"" + ) + .unwrap(), B256::from_str("0x0ac361fe774b78f8fc4e86c1916930d150865c3fc2e21dca2e58833557608bac") .unwrap() ); @@ -145,16 +228,16 @@ mod tests { #[test] fn parse_string_key_args() { - let args = - CommandParser::::parse_from(["reth", "SyncStage", "MerkleExecution"]).args; - assert_eq!(args.table_key::().unwrap(), "MerkleExecution"); + assert_eq!( + table_key::("\"MerkleExecution\"").unwrap(), + "MerkleExecution" + ); } #[test] fn parse_json_key_args() { - let args = CommandParser::::parse_from(["reth", "StorageHistory", r#"{ "address": "0x01957911244e546ce519fbac6f798958fafadb41", "sharded_key": { "key": "0x0000000000000000000000000000000000000000000000000000000000000003", "highest_block_number": 18446744073709551615 } }"#]).args; assert_eq!( - args.table_key::().unwrap(), + table_key::(r#"{ "address": "0x01957911244e546ce519fbac6f798958fafadb41", "sharded_key": { "key": "0x0000000000000000000000000000000000000000000000000000000000000003", "highest_block_number": 18446744073709551615 } }"#).unwrap(), StorageShardedKey::new( Address::from_str("0x01957911244e546ce519fbac6f798958fafadb41").unwrap(), B256::from_str( @@ -168,9 +251,8 @@ mod tests { #[test] fn parse_json_key_for_account_history() { - let args = CommandParser::::parse_from(["reth", "AccountHistory", r#"{ "key": "0x4448e1273fd5a8bfdb9ed111e96889c960eee145", "highest_block_number": 18446744073709551615 }"#]).args; assert_eq!( - args.table_key::().unwrap(), + table_key::(r#"{ "key": "0x4448e1273fd5a8bfdb9ed111e96889c960eee145", "highest_block_number": 18446744073709551615 }"#).unwrap(), ShardedKey::new( Address::from_str("0x4448e1273fd5a8bfdb9ed111e96889c960eee145").unwrap(), 18446744073709551615 diff --git a/bin/reth/src/commands/db/list.rs b/bin/reth/src/commands/db/list.rs index 49cb4affe9fe..bd1b6033e371 100644 --- a/bin/reth/src/commands/db/list.rs +++ b/bin/reth/src/commands/db/list.rs @@ -50,7 +50,7 @@ pub struct Command { impl Command { /// Execute `db list` command - pub fn execute(self, tool: &DbTool<'_, DatabaseEnv>) -> eyre::Result<()> { + pub fn execute(self, tool: &DbTool) -> eyre::Result<()> { self.table.view(&ListTableViewer { tool, args: &self }) } @@ -81,7 +81,7 @@ impl Command { } struct ListTableViewer<'a> { - tool: &'a DbTool<'a, DatabaseEnv>, + tool: &'a DbTool, args: &'a Command, } @@ -89,7 +89,7 @@ impl TableViewer<()> for ListTableViewer<'_> { type Error = eyre::Report; fn view(&self) -> Result<(), Self::Error> { - self.tool.db.view(|tx| { + self.tool.provider_factory.db_ref().view(|tx| { let table_db = tx.inner.open_db(Some(self.args.table.name())).wrap_err("Could not open db.")?; let stats = tx.inner.db_stat(&table_db).wrap_err(format!("Could not find table: {}", stringify!($table)))?; let total_entries = stats.entries(); diff --git a/bin/reth/src/commands/db/mod.rs b/bin/reth/src/commands/db/mod.rs index e6f199cd7a6e..92f3526d0d58 100644 --- a/bin/reth/src/commands/db/mod.rs +++ b/bin/reth/src/commands/db/mod.rs @@ -9,18 +9,13 @@ use crate::{ utils::DbTool, }; use clap::{Parser, Subcommand}; -use comfy_table::{Cell, Row, Table as ComfyTable}; -use eyre::WrapErr; -use human_bytes::human_bytes; use reth_db::{ - database::Database, - mdbx, mdbx::DatabaseArguments, open_db, open_db_read_only, version::{get_db_version, DatabaseVersionError, DB_VERSION}, - Tables, }; use reth_primitives::ChainSpec; +use reth_provider::ProviderFactory; use std::{ io::{self, Write}, sync::Arc, @@ -30,7 +25,8 @@ mod clear; mod diff; mod get; mod list; -mod snapshots; +mod static_files; +mod stats; /// DB List TUI mod tui; @@ -71,7 +67,7 @@ pub struct Command { /// `reth db` subcommands pub enum Subcommands { /// Lists all the tables, their entry count and their size - Stats, + Stats(stats::Command), /// Lists the contents of a table List(list::Command), /// Create a diff between two database tables or two entire databases. @@ -86,8 +82,8 @@ pub enum Subcommands { }, /// Deletes all table entries Clear(clear::Command), - /// Snapshots tables from database - Snapshot(snapshots::Command), + /// Creates static files from database tables + CreateStaticFiles(static_files::Command), /// Lists current and local database versions Version, /// Returns the full database path @@ -100,102 +96,30 @@ impl Command { // add network name to data dir let data_dir = self.datadir.unwrap_or_chain_default(self.chain.chain); let db_path = data_dir.db_path(); + let static_files_path = data_dir.static_files_path(); match self.command { // TODO: We'll need to add this on the DB trait. - Subcommands::Stats { .. } => { + Subcommands::Stats(command) => { let db = open_db_read_only( &db_path, DatabaseArguments::default().log_level(self.db.log_level), )?; - let tool = DbTool::new(&db, self.chain.clone())?; - let mut stats_table = ComfyTable::new(); - stats_table.load_preset(comfy_table::presets::ASCII_MARKDOWN); - stats_table.set_header([ - "Table Name", - "# Entries", - "Branch Pages", - "Leaf Pages", - "Overflow Pages", - "Total Size", - ]); + let provider_factory = + ProviderFactory::new(db, self.chain.clone(), static_files_path)?; - tool.db.view(|tx| { - let mut tables = - Tables::ALL.iter().map(|table| table.name()).collect::>(); - tables.sort(); - let mut total_size = 0; - for table in tables { - let table_db = - tx.inner.open_db(Some(table)).wrap_err("Could not open db.")?; - - let stats = tx - .inner - .db_stat(&table_db) - .wrap_err(format!("Could not find table: {table}"))?; - - // Defaults to 16KB right now but we should - // re-evaluate depending on the DB we end up using - // (e.g. REDB does not have these options as configurable intentionally) - let page_size = stats.page_size() as usize; - let leaf_pages = stats.leaf_pages(); - let branch_pages = stats.branch_pages(); - let overflow_pages = stats.overflow_pages(); - let num_pages = leaf_pages + branch_pages + overflow_pages; - let table_size = page_size * num_pages; - - total_size += table_size; - let mut row = Row::new(); - row.add_cell(Cell::new(table)) - .add_cell(Cell::new(stats.entries())) - .add_cell(Cell::new(branch_pages)) - .add_cell(Cell::new(leaf_pages)) - .add_cell(Cell::new(overflow_pages)) - .add_cell(Cell::new(human_bytes(table_size as f64))); - stats_table.add_row(row); - } - - let max_widths = stats_table.column_max_content_widths(); - - let mut seperator = Row::new(); - for width in max_widths { - seperator.add_cell(Cell::new("-".repeat(width as usize))); - } - stats_table.add_row(seperator); - - let mut row = Row::new(); - row.add_cell(Cell::new("Total DB size")) - .add_cell(Cell::new("")) - .add_cell(Cell::new("")) - .add_cell(Cell::new("")) - .add_cell(Cell::new("")) - .add_cell(Cell::new(human_bytes(total_size as f64))); - stats_table.add_row(row); - - let freelist = tx.inner.env().freelist()?; - let freelist_size = freelist * - tx.inner.db_stat(&mdbx::Database::freelist_db())?.page_size() as usize; - - let mut row = Row::new(); - row.add_cell(Cell::new("Freelist size")) - .add_cell(Cell::new(freelist)) - .add_cell(Cell::new("")) - .add_cell(Cell::new("")) - .add_cell(Cell::new("")) - .add_cell(Cell::new(human_bytes(freelist_size as f64))); - stats_table.add_row(row); - - Ok::<(), eyre::Report>(()) - })??; - - println!("{stats_table}"); + let tool = DbTool::new(provider_factory, self.chain.clone())?; + command.execute(data_dir, &tool)?; } Subcommands::List(command) => { let db = open_db_read_only( &db_path, DatabaseArguments::default().log_level(self.db.log_level), )?; - let tool = DbTool::new(&db, self.chain.clone())?; + let provider_factory = + ProviderFactory::new(db, self.chain.clone(), static_files_path)?; + + let tool = DbTool::new(provider_factory, self.chain.clone())?; command.execute(&tool)?; } Subcommands::Diff(command) => { @@ -203,7 +127,10 @@ impl Command { &db_path, DatabaseArguments::default().log_level(self.db.log_level), )?; - let tool = DbTool::new(&db, self.chain.clone())?; + let provider_factory = + ProviderFactory::new(db, self.chain.clone(), static_files_path)?; + + let tool = DbTool::new(provider_factory, self.chain.clone())?; command.execute(&tool)?; } Subcommands::Get(command) => { @@ -211,13 +138,16 @@ impl Command { &db_path, DatabaseArguments::default().log_level(self.db.log_level), )?; - let tool = DbTool::new(&db, self.chain.clone())?; + let provider_factory = + ProviderFactory::new(db, self.chain.clone(), static_files_path)?; + + let tool = DbTool::new(provider_factory, self.chain.clone())?; command.execute(&tool)?; } Subcommands::Drop { force } => { if !force { // Ask for confirmation - print!("Are you sure you want to drop the database at {db_path:?}? This cannot be undone. (y/N): "); + print!("Are you sure you want to drop the database at {data_dir}? This cannot be undone. (y/N): "); // Flush the buffer to ensure the message is printed immediately io::stdout().flush().unwrap(); @@ -232,16 +162,22 @@ impl Command { let db = open_db(&db_path, DatabaseArguments::default().log_level(self.db.log_level))?; - let mut tool = DbTool::new(&db, self.chain.clone())?; - tool.drop(db_path)?; + let provider_factory = + ProviderFactory::new(db, self.chain.clone(), static_files_path.clone())?; + + let mut tool = DbTool::new(provider_factory, self.chain.clone())?; + tool.drop(db_path, static_files_path)?; } Subcommands::Clear(command) => { let db = open_db(&db_path, DatabaseArguments::default().log_level(self.db.log_level))?; - command.execute(&db)?; + let provider_factory = + ProviderFactory::new(db, self.chain.clone(), static_files_path)?; + + command.execute(provider_factory)?; } - Subcommands::Snapshot(command) => { - command.execute(&db_path, self.db.log_level, self.chain.clone())?; + Subcommands::CreateStaticFiles(command) => { + command.execute(data_dir, self.db.log_level, self.chain.clone())?; } Subcommands::Version => { let local_db_version = match get_db_version(&db_path) { diff --git a/bin/reth/src/commands/db/snapshots/bench.rs b/bin/reth/src/commands/db/static_files/bench.rs similarity index 69% rename from bin/reth/src/commands/db/snapshots/bench.rs rename to bin/reth/src/commands/db/static_files/bench.rs index 928898205f07..d3de628e2b43 100644 --- a/bin/reth/src/commands/db/snapshots/bench.rs +++ b/bin/reth/src/commands/db/static_files/bench.rs @@ -1,7 +1,7 @@ use reth_db::DatabaseEnv; use reth_primitives::{ - snapshot::{Compression, Filters}, - ChainSpec, SnapshotSegment, + static_file::{Compression, Filters}, + StaticFileSegment, }; use reth_provider::{DatabaseProviderRO, ProviderFactory}; use std::{fmt::Debug, sync::Arc, time::Instant}; @@ -16,11 +16,11 @@ pub(crate) enum BenchKind { pub(crate) fn bench( bench_kind: BenchKind, - db: (DatabaseEnv, Arc), - segment: SnapshotSegment, + provider_factory: Arc>, + segment: StaticFileSegment, filters: Filters, compression: Compression, - mut snapshot_method: F1, + mut static_file_method: F1, database_method: F2, ) -> eyre::Result<()> where @@ -28,22 +28,19 @@ where F2: Fn(DatabaseProviderRO) -> eyre::Result, R: Debug + PartialEq, { - let (db, chain) = db; - println!(); println!("############"); println!("## [{segment:?}] [{compression:?}] [{filters:?}] [{bench_kind:?}]"); - let snap_result = { + let static_file_result = { let start = Instant::now(); - let result = snapshot_method()?; + let result = static_file_method()?; let end = start.elapsed().as_micros(); - println!("# snapshot {bench_kind:?} | {end} μs"); + println!("# static file {bench_kind:?} | {end} μs"); result }; let db_result = { - let factory = ProviderFactory::new(db, chain); - let provider = factory.provider()?; + let provider = provider_factory.provider()?; let start = Instant::now(); let result = database_method(provider)?; let end = start.elapsed().as_micros(); @@ -51,7 +48,7 @@ where result }; - assert_eq!(snap_result, db_result); + assert_eq!(static_file_result, db_result); Ok(()) } diff --git a/bin/reth/src/commands/db/snapshots/headers.rs b/bin/reth/src/commands/db/static_files/headers.rs similarity index 64% rename from bin/reth/src/commands/db/snapshots/headers.rs rename to bin/reth/src/commands/db/static_files/headers.rs index 6b6f2b11904c..452063a89a19 100644 --- a/bin/reth/src/commands/db/snapshots/headers.rs +++ b/bin/reth/src/commands/db/static_files/headers.rs @@ -3,38 +3,27 @@ use super::{ Command, }; use rand::{seq::SliceRandom, Rng}; -use reth_db::{mdbx::DatabaseArguments, open_db_read_only, snapshot::HeaderMask}; -use reth_interfaces::db::LogLevel; +use reth_db::{static_file::HeaderMask, DatabaseEnv}; use reth_primitives::{ - snapshot::{Compression, Filters, InclusionFilter, PerfectHashingFunction}, - BlockHash, ChainSpec, Header, SnapshotSegment, + static_file::{Compression, Filters, InclusionFilter, PerfectHashingFunction}, + BlockHash, Header, StaticFileSegment, }; use reth_provider::{ - providers::SnapshotProvider, BlockNumReader, HeaderProvider, ProviderError, ProviderFactory, - TransactionsProviderExt, -}; -use std::{ - path::{Path, PathBuf}, - sync::Arc, + providers::StaticFileProvider, BlockNumReader, HeaderProvider, ProviderError, ProviderFactory, }; +use std::{ops::RangeInclusive, path::PathBuf, sync::Arc}; impl Command { - pub(crate) fn bench_headers_snapshot( + pub(crate) fn bench_headers_static_file( &self, - db_path: &Path, - log_level: Option, - chain: Arc, + provider_factory: Arc>, compression: Compression, inclusion_filter: InclusionFilter, phf: Option, ) -> eyre::Result<()> { - let db_args = DatabaseArguments::default().log_level(log_level); - - let factory = ProviderFactory::new(open_db_read_only(db_path, db_args)?, chain.clone()); - let provider = factory.provider()?; + let provider = provider_factory.provider()?; let tip = provider.last_block_number()?; - let block_range = - self.block_ranges(tip).first().expect("has been generated before").clone(); + let block_range = *self.block_ranges(tip).first().expect("has been generated before"); let filters = if let Some(phf) = self.with_filters.then_some(phf).flatten() { Filters::WithFilters(inclusion_filter, phf) @@ -42,19 +31,16 @@ impl Command { Filters::WithoutFilters }; - let mut row_indexes = block_range.clone().collect::>(); + let range: RangeInclusive = (&block_range).into(); + let mut row_indexes = range.collect::>(); let mut rng = rand::thread_rng(); - let tx_range = ProviderFactory::new(open_db_read_only(db_path, db_args)?, chain.clone()) - .provider()? - .transaction_range_by_block_range(block_range.clone())?; - - let path: PathBuf = SnapshotSegment::Headers - .filename_with_configuration(filters, compression, &block_range, &tx_range) + let path: PathBuf = StaticFileSegment::Headers + .filename_with_configuration(filters, compression, &block_range) .into(); - let provider = SnapshotProvider::new(PathBuf::default())?; + let provider = StaticFileProvider::new(PathBuf::default())?; let jar_provider = provider.get_segment_provider_from_block( - SnapshotSegment::Headers, + StaticFileSegment::Headers, self.from, Some(&path), )?; @@ -63,8 +49,8 @@ impl Command { for bench_kind in [BenchKind::Walk, BenchKind::RandomAll] { bench( bench_kind, - (open_db_read_only(db_path, db_args)?, chain.clone()), - SnapshotSegment::Headers, + provider_factory.clone(), + StaticFileSegment::Headers, filters, compression, || { @@ -94,8 +80,8 @@ impl Command { let num = row_indexes[rng.gen_range(0..row_indexes.len())]; bench( BenchKind::RandomOne, - (open_db_read_only(db_path, db_args)?, chain.clone()), - SnapshotSegment::Headers, + provider_factory.clone(), + StaticFileSegment::Headers, filters, compression, || { @@ -114,16 +100,15 @@ impl Command { // BENCHMARK QUERYING A RANDOM HEADER BY HASH { let num = row_indexes[rng.gen_range(0..row_indexes.len())] as u64; - let header_hash = - ProviderFactory::new(open_db_read_only(db_path, db_args)?, chain.clone()) - .header_by_number(num)? - .ok_or(ProviderError::HeaderNotFound(num.into()))? - .hash_slow(); + let header_hash = provider_factory + .header_by_number(num)? + .ok_or(ProviderError::HeaderNotFound(num.into()))? + .hash_slow(); bench( BenchKind::RandomHash, - (open_db_read_only(db_path, db_args)?, chain.clone()), - SnapshotSegment::Headers, + provider_factory.clone(), + StaticFileSegment::Headers, filters, compression, || { diff --git a/bin/reth/src/commands/db/snapshots/mod.rs b/bin/reth/src/commands/db/static_files/mod.rs similarity index 54% rename from bin/reth/src/commands/db/snapshots/mod.rs rename to bin/reth/src/commands/db/static_files/mod.rs index 410f77b7f7a5..7b12437ff2ef 100644 --- a/bin/reth/src/commands/db/snapshots/mod.rs +++ b/bin/reth/src/commands/db/static_files/mod.rs @@ -9,14 +9,17 @@ use reth_db::{ }; use reth_interfaces::db::LogLevel; use reth_nippy_jar::{NippyJar, NippyJarCursor}; +use reth_node_core::dirs::{ChainPath, DataDirPath}; use reth_primitives::{ - snapshot::{Compression, Filters, InclusionFilter, PerfectHashingFunction, SegmentHeader}, - BlockNumber, ChainSpec, SnapshotSegment, + static_file::{ + Compression, Filters, InclusionFilter, PerfectHashingFunction, SegmentConfig, + SegmentHeader, SegmentRangeInclusive, + }, + BlockNumber, ChainSpec, StaticFileSegment, }; -use reth_provider::{BlockNumReader, ProviderFactory, TransactionsProviderExt}; -use reth_snapshot::{segments as snap_segments, segments::Segment}; +use reth_provider::{BlockNumReader, ProviderFactory}; +use reth_static_file::{segments as static_file_segments, segments::Segment}; use std::{ - ops::RangeInclusive, path::{Path, PathBuf}, sync::Arc, time::{Duration, Instant}, @@ -28,20 +31,20 @@ mod receipts; mod transactions; #[derive(Parser, Debug)] -/// Arguments for the `reth db snapshot` command. +/// Arguments for the `reth db create-static-files` command. pub struct Command { - /// Snapshot segments to generate. - segments: Vec, + /// Static File segments to generate. + segments: Vec, - /// Starting block for the snapshot. + /// Starting block for the static file. #[arg(long, short, default_value = "0")] from: BlockNumber, - /// Number of blocks in the snapshot. + /// Number of blocks in the static file. #[arg(long, short, default_value = "500000")] block_interval: u64, - /// Sets the number of snapshots built in parallel. Note: Each parallel build is + /// Sets the number of static files built in parallel. Note: Each parallel build is /// memory-intensive. #[arg( long, short, @@ -50,15 +53,15 @@ pub struct Command { )] parallel: u64, - /// Flag to skip snapshot creation and print snapshot files stats. + /// Flag to skip static file creation and print static files stats. #[arg(long, default_value = "false")] only_stats: bool, - /// Flag to enable database-to-snapshot benchmarking. + /// Flag to enable database-to-static file benchmarking. #[arg(long, default_value = "false")] bench: bool, - /// Flag to skip snapshot creation and only run benchmarks on existing snapshots. + /// Flag to skip static file creation and only run benchmarks on existing static files. #[arg(long, default_value = "false")] only_bench: bool, @@ -76,30 +79,33 @@ pub struct Command { } impl Command { - /// Execute `db snapshot` command + /// Execute `db create-static-files` command pub fn execute( self, - db_path: &Path, + data_dir: ChainPath, log_level: Option, chain: Arc, ) -> eyre::Result<()> { - let all_combinations = - self.segments.iter().cartesian_product(self.compression.iter()).cartesian_product( - if self.phf.is_empty() { - vec![None] - } else { - self.phf.iter().copied().map(Some).collect::>() - }, - ); + let all_combinations = self + .segments + .iter() + .cartesian_product(self.compression.iter().copied()) + .cartesian_product(if self.phf.is_empty() { + vec![None] + } else { + self.phf.iter().copied().map(Some).collect::>() + }); + + let db = open_db_read_only( + data_dir.db_path().as_path(), + DatabaseArguments::default() + .log_level(log_level) + .max_read_transaction_duration(Some(MaxReadTransactionDuration::Unbounded)), + )?; + let provider_factory = + Arc::new(ProviderFactory::new(db, chain.clone(), data_dir.static_files_path())?); { - let db = open_db_read_only( - db_path, - DatabaseArguments::default() - .max_read_transaction_duration(Some(MaxReadTransactionDuration::Unbounded)), - )?; - let factory = Arc::new(ProviderFactory::new(db, chain.clone())); - if !self.only_bench { for ((mode, compression), phf) in all_combinations.clone() { let filters = if let Some(phf) = self.with_filters.then_some(phf).flatten() { @@ -109,17 +115,21 @@ impl Command { }; match mode { - SnapshotSegment::Headers => self.generate_snapshot::( - factory.clone(), - snap_segments::Headers::new(*compression, filters), + StaticFileSegment::Headers => self.generate_static_file::( + provider_factory.clone(), + static_file_segments::Headers, + SegmentConfig { filters, compression }, )?, - SnapshotSegment::Transactions => self.generate_snapshot::( - factory.clone(), - snap_segments::Transactions::new(*compression, filters), - )?, - SnapshotSegment::Receipts => self.generate_snapshot::( - factory.clone(), - snap_segments::Receipts::new(*compression, filters), + StaticFileSegment::Transactions => self + .generate_static_file::( + provider_factory.clone(), + static_file_segments::Transactions, + SegmentConfig { filters, compression }, + )?, + StaticFileSegment::Receipts => self.generate_static_file::( + provider_factory.clone(), + static_file_segments::Receipts, + SegmentConfig { filters, compression }, )?, } } @@ -127,29 +137,23 @@ impl Command { } if self.only_bench || self.bench { - for ((mode, compression), phf) in all_combinations.clone() { + for ((mode, compression), phf) in all_combinations { match mode { - SnapshotSegment::Headers => self.bench_headers_snapshot( - db_path, - log_level, - chain.clone(), - *compression, + StaticFileSegment::Headers => self.bench_headers_static_file( + provider_factory.clone(), + compression, InclusionFilter::Cuckoo, phf, )?, - SnapshotSegment::Transactions => self.bench_transactions_snapshot( - db_path, - log_level, - chain.clone(), - *compression, + StaticFileSegment::Transactions => self.bench_transactions_static_file( + provider_factory.clone(), + compression, InclusionFilter::Cuckoo, phf, )?, - SnapshotSegment::Receipts => self.bench_receipts_snapshot( - db_path, - log_level, - chain.clone(), - *compression, + StaticFileSegment::Receipts => self.bench_receipts_static_file( + provider_factory.clone(), + compression, InclusionFilter::Cuckoo, phf, )?, @@ -161,30 +165,31 @@ impl Command { } /// Generates successive inclusive block ranges up to the tip starting at `self.from`. - fn block_ranges(&self, tip: BlockNumber) -> Vec> { + fn block_ranges(&self, tip: BlockNumber) -> Vec { let mut from = self.from; let mut ranges = Vec::new(); while from <= tip { let end_range = std::cmp::min(from + self.block_interval - 1, tip); - ranges.push(from..=end_range); + ranges.push(SegmentRangeInclusive::new(from, end_range)); from = end_range + 1; } ranges } - /// Generates snapshots from `self.from` with a `self.block_interval`. Generates them in + /// Generates static files from `self.from` with a `self.block_interval`. Generates them in /// parallel if specified. - fn generate_snapshot( + fn generate_static_file( &self, factory: Arc>, - segment: impl Segment + Send + Sync, + segment: impl Segment, + config: SegmentConfig, ) -> eyre::Result<()> { let dir = PathBuf::default(); let ranges = self.block_ranges(factory.best_block_number()?); - let mut created_snapshots = vec![]; + let mut created_static_files = vec![]; // Filter/PHF is memory intensive, so we have to limit the parallelism. for block_ranges in ranges.chunks(self.parallel as usize) { @@ -194,34 +199,36 @@ impl Command { let provider = factory.provider()?; if !self.only_stats { - segment.snapshot::(&provider, &dir, block_range.clone())?; + segment.create_static_file_file( + &provider, + dir.as_path(), + config, + block_range.into(), + )?; } - let tx_range = - provider.transaction_range_by_block_range(block_range.clone())?; - - Ok(segment.segment().filename(block_range, &tx_range)) + Ok(segment.segment().filename(block_range)) }) .collect::, eyre::Report>>()?; - created_snapshots.extend(created_files); + created_static_files.extend(created_files); } - self.stats(created_snapshots) + self.stats(created_static_files) } - /// Prints detailed statistics for each snapshot, including loading time. + /// Prints detailed statistics for each static file, including loading time. /// - /// This function loads each snapshot from the provided paths and prints - /// statistics about various aspects of each snapshot, such as filters size, + /// This function loads each static file from the provided paths and prints + /// statistics about various aspects of each static file, such as filters size, /// offset index size, offset list size, and loading time. - fn stats(&self, snapshots: Vec>) -> eyre::Result<()> { + fn stats(&self, static_files: Vec>) -> eyre::Result<()> { let mut total_filters_size = 0; let mut total_index_size = 0; let mut total_duration = Duration::new(0, 0); let mut total_file_size = 0; - for snap in &snapshots { + for snap in &static_files { let start_time = Instant::now(); let jar = NippyJar::::load(snap.as_ref())?; let _cursor = NippyJarCursor::new(&jar)?; @@ -233,7 +240,7 @@ impl Command { total_duration += duration; total_file_size += file_size; - println!("Snapshot: {:?}", snap.as_ref().file_name()); + println!("StaticFile: {:?}", snap.as_ref().file_name()); println!(" File Size: {:>7}", human_bytes(file_size as f64)); println!(" Filters Size: {:>7}", human_bytes(jar.filter_size() as f64)); println!(" Offset Index Size: {:>7}", human_bytes(jar.offsets_index_size() as f64)); @@ -244,7 +251,7 @@ impl Command { ); } - let avg_duration = total_duration / snapshots.len() as u32; + let avg_duration = total_duration / static_files.len() as u32; println!("Total Filters Size: {:>7}", human_bytes(total_filters_size as f64)); println!("Total Offset Index Size: {:>7}", human_bytes(total_index_size as f64)); diff --git a/bin/reth/src/commands/db/snapshots/receipts.rs b/bin/reth/src/commands/db/static_files/receipts.rs similarity index 68% rename from bin/reth/src/commands/db/snapshots/receipts.rs rename to bin/reth/src/commands/db/static_files/receipts.rs index 203e021d372f..ec9f580246cb 100644 --- a/bin/reth/src/commands/db/snapshots/receipts.rs +++ b/bin/reth/src/commands/db/static_files/receipts.rs @@ -3,38 +3,28 @@ use super::{ Command, Compression, PerfectHashingFunction, }; use rand::{seq::SliceRandom, Rng}; -use reth_db::{mdbx::DatabaseArguments, open_db_read_only, snapshot::ReceiptMask}; -use reth_interfaces::db::LogLevel; +use reth_db::{static_file::ReceiptMask, DatabaseEnv}; use reth_primitives::{ - snapshot::{Filters, InclusionFilter}, - ChainSpec, Receipt, SnapshotSegment, + static_file::{Filters, InclusionFilter}, + Receipt, StaticFileSegment, }; use reth_provider::{ - providers::SnapshotProvider, BlockNumReader, ProviderError, ProviderFactory, ReceiptProvider, + providers::StaticFileProvider, BlockNumReader, ProviderError, ProviderFactory, ReceiptProvider, TransactionsProvider, TransactionsProviderExt, }; -use std::{ - path::{Path, PathBuf}, - sync::Arc, -}; +use std::{path::PathBuf, sync::Arc}; impl Command { - pub(crate) fn bench_receipts_snapshot( + pub(crate) fn bench_receipts_static_file( &self, - db_path: &Path, - log_level: Option, - chain: Arc, + provider_factory: Arc>, compression: Compression, inclusion_filter: InclusionFilter, phf: Option, ) -> eyre::Result<()> { - let db_args = DatabaseArguments::default().log_level(log_level); - - let factory = ProviderFactory::new(open_db_read_only(db_path, db_args)?, chain.clone()); - let provider = factory.provider()?; + let provider = provider_factory.provider()?; let tip = provider.last_block_number()?; - let block_range = - self.block_ranges(tip).first().expect("has been generated before").clone(); + let block_range = *self.block_ranges(tip).first().expect("has been generated before"); let filters = if let Some(phf) = self.with_filters.then_some(phf).flatten() { Filters::WithFilters(inclusion_filter, phf) @@ -44,19 +34,18 @@ impl Command { let mut rng = rand::thread_rng(); - let tx_range = ProviderFactory::new(open_db_read_only(db_path, db_args)?, chain.clone()) - .provider()? - .transaction_range_by_block_range(block_range.clone())?; + let tx_range = + provider_factory.provider()?.transaction_range_by_block_range(block_range.into())?; let mut row_indexes = tx_range.clone().collect::>(); - let path: PathBuf = SnapshotSegment::Receipts - .filename_with_configuration(filters, compression, &block_range, &tx_range) + let path: PathBuf = StaticFileSegment::Receipts + .filename_with_configuration(filters, compression, &block_range) .into(); - let provider = SnapshotProvider::new(PathBuf::default())?; + let provider = StaticFileProvider::new(PathBuf::default())?; let jar_provider = provider.get_segment_provider_from_block( - SnapshotSegment::Receipts, + StaticFileSegment::Receipts, self.from, Some(&path), )?; @@ -65,8 +54,8 @@ impl Command { for bench_kind in [BenchKind::Walk, BenchKind::RandomAll] { bench( bench_kind, - (open_db_read_only(db_path, db_args)?, chain.clone()), - SnapshotSegment::Receipts, + provider_factory.clone(), + StaticFileSegment::Receipts, filters, compression, || { @@ -96,8 +85,8 @@ impl Command { let num = row_indexes[rng.gen_range(0..row_indexes.len())]; bench( BenchKind::RandomOne, - (open_db_read_only(db_path, db_args)?, chain.clone()), - SnapshotSegment::Receipts, + provider_factory.clone(), + StaticFileSegment::Receipts, filters, compression, || { @@ -116,15 +105,15 @@ impl Command { // BENCHMARK QUERYING A RANDOM RECEIPT BY HASH { let num = row_indexes[rng.gen_range(0..row_indexes.len())] as u64; - let tx_hash = ProviderFactory::new(open_db_read_only(db_path, db_args)?, chain.clone()) + let tx_hash = provider_factory .transaction_by_id(num)? .ok_or(ProviderError::ReceiptNotFound(num.into()))? .hash(); bench( BenchKind::RandomHash, - (open_db_read_only(db_path, db_args)?, chain.clone()), - SnapshotSegment::Receipts, + provider_factory, + StaticFileSegment::Receipts, filters, compression, || { diff --git a/bin/reth/src/commands/db/snapshots/transactions.rs b/bin/reth/src/commands/db/static_files/transactions.rs similarity index 69% rename from bin/reth/src/commands/db/snapshots/transactions.rs rename to bin/reth/src/commands/db/static_files/transactions.rs index e7600c92b669..79758c09325e 100644 --- a/bin/reth/src/commands/db/snapshots/transactions.rs +++ b/bin/reth/src/commands/db/static_files/transactions.rs @@ -3,38 +3,29 @@ use super::{ Command, Compression, PerfectHashingFunction, }; use rand::{seq::SliceRandom, Rng}; -use reth_db::{mdbx::DatabaseArguments, open_db_read_only, snapshot::TransactionMask}; -use reth_interfaces::db::LogLevel; +use reth_db::{static_file::TransactionMask, DatabaseEnv}; + use reth_primitives::{ - snapshot::{Filters, InclusionFilter}, - ChainSpec, SnapshotSegment, TransactionSignedNoHash, + static_file::{Filters, InclusionFilter}, + StaticFileSegment, TransactionSignedNoHash, }; use reth_provider::{ - providers::SnapshotProvider, BlockNumReader, ProviderError, ProviderFactory, + providers::StaticFileProvider, BlockNumReader, ProviderError, ProviderFactory, TransactionsProvider, TransactionsProviderExt, }; -use std::{ - path::{Path, PathBuf}, - sync::Arc, -}; +use std::{path::PathBuf, sync::Arc}; impl Command { - pub(crate) fn bench_transactions_snapshot( + pub(crate) fn bench_transactions_static_file( &self, - db_path: &Path, - log_level: Option, - chain: Arc, + provider_factory: Arc>, compression: Compression, inclusion_filter: InclusionFilter, phf: Option, ) -> eyre::Result<()> { - let db_args = DatabaseArguments::default().log_level(log_level); - - let factory = ProviderFactory::new(open_db_read_only(db_path, db_args)?, chain.clone()); - let provider = factory.provider()?; + let provider = provider_factory.provider()?; let tip = provider.last_block_number()?; - let block_range = - self.block_ranges(tip).first().expect("has been generated before").clone(); + let block_range = *self.block_ranges(tip).first().expect("has been generated before"); let filters = if let Some(phf) = self.with_filters.then_some(phf).flatten() { Filters::WithFilters(inclusion_filter, phf) @@ -44,16 +35,16 @@ impl Command { let mut rng = rand::thread_rng(); - let tx_range = provider.transaction_range_by_block_range(block_range.clone())?; + let tx_range = provider.transaction_range_by_block_range(block_range.into())?; let mut row_indexes = tx_range.clone().collect::>(); - let path: PathBuf = SnapshotSegment::Transactions - .filename_with_configuration(filters, compression, &block_range, &tx_range) + let path: PathBuf = StaticFileSegment::Transactions + .filename_with_configuration(filters, compression, &block_range) .into(); - let provider = SnapshotProvider::new(PathBuf::default())?; + let provider = StaticFileProvider::new(PathBuf::default())?; let jar_provider = provider.get_segment_provider_from_block( - SnapshotSegment::Transactions, + StaticFileSegment::Transactions, self.from, Some(&path), )?; @@ -62,8 +53,8 @@ impl Command { for bench_kind in [BenchKind::Walk, BenchKind::RandomAll] { bench( bench_kind, - (open_db_read_only(db_path, db_args)?, chain.clone()), - SnapshotSegment::Transactions, + provider_factory.clone(), + StaticFileSegment::Transactions, filters, compression, || { @@ -94,8 +85,8 @@ impl Command { let num = row_indexes[rng.gen_range(0..row_indexes.len())]; bench( BenchKind::RandomOne, - (open_db_read_only(db_path, db_args)?, chain.clone()), - SnapshotSegment::Transactions, + provider_factory.clone(), + StaticFileSegment::Transactions, filters, compression, || { @@ -115,16 +106,15 @@ impl Command { // BENCHMARK QUERYING A RANDOM TRANSACTION BY HASH { let num = row_indexes[rng.gen_range(0..row_indexes.len())] as u64; - let transaction_hash = - ProviderFactory::new(open_db_read_only(db_path, db_args)?, chain.clone()) - .transaction_by_id(num)? - .ok_or(ProviderError::TransactionNotFound(num.into()))? - .hash(); + let transaction_hash = provider_factory + .transaction_by_id(num)? + .ok_or(ProviderError::TransactionNotFound(num.into()))? + .hash(); bench( BenchKind::RandomHash, - (open_db_read_only(db_path, db_args)?, chain.clone()), - SnapshotSegment::Transactions, + provider_factory, + StaticFileSegment::Transactions, filters, compression, || { diff --git a/bin/reth/src/commands/db/stats.rs b/bin/reth/src/commands/db/stats.rs new file mode 100644 index 000000000000..09ad3a61b53e --- /dev/null +++ b/bin/reth/src/commands/db/stats.rs @@ -0,0 +1,290 @@ +use crate::utils::DbTool; +use clap::Parser; +use comfy_table::{Cell, Row, Table as ComfyTable}; +use eyre::WrapErr; +use human_bytes::human_bytes; +use itertools::Itertools; +use reth_db::{database::Database, mdbx, static_file::iter_static_files, DatabaseEnv, Tables}; +use reth_node_core::dirs::{ChainPath, DataDirPath}; +use reth_primitives::static_file::{find_fixed_range, SegmentRangeInclusive}; +use reth_provider::providers::StaticFileProvider; +use std::fs::File; + +#[derive(Parser, Debug)] +/// The arguments for the `reth db stats` command +pub struct Command { + /// Show only the total size for static files. + #[arg(long, default_value_t = false)] + only_total_size: bool, + /// Show only the summary per static file segment. + #[arg(long, default_value_t = false)] + summary: bool, +} + +impl Command { + /// Execute `db stats` command + pub fn execute( + self, + data_dir: ChainPath, + tool: &DbTool, + ) -> eyre::Result<()> { + let static_files_stats_table = self.static_files_stats_table(data_dir)?; + println!("{static_files_stats_table}"); + + println!("\n"); + + let db_stats_table = self.db_stats_table(tool)?; + println!("{db_stats_table}"); + + Ok(()) + } + + fn db_stats_table(&self, tool: &DbTool) -> eyre::Result { + let mut table = ComfyTable::new(); + table.load_preset(comfy_table::presets::ASCII_MARKDOWN); + table.set_header([ + "Table Name", + "# Entries", + "Branch Pages", + "Leaf Pages", + "Overflow Pages", + "Total Size", + ]); + + tool.provider_factory.db_ref().view(|tx| { + let mut db_tables = Tables::ALL.iter().map(|table| table.name()).collect::>(); + db_tables.sort(); + let mut total_size = 0; + for db_table in db_tables { + let table_db = tx.inner.open_db(Some(db_table)).wrap_err("Could not open db.")?; + + let stats = tx + .inner + .db_stat(&table_db) + .wrap_err(format!("Could not find table: {db_table}"))?; + + // Defaults to 16KB right now but we should + // re-evaluate depending on the DB we end up using + // (e.g. REDB does not have these options as configurable intentionally) + let page_size = stats.page_size() as usize; + let leaf_pages = stats.leaf_pages(); + let branch_pages = stats.branch_pages(); + let overflow_pages = stats.overflow_pages(); + let num_pages = leaf_pages + branch_pages + overflow_pages; + let table_size = page_size * num_pages; + + total_size += table_size; + let mut row = Row::new(); + row.add_cell(Cell::new(db_table)) + .add_cell(Cell::new(stats.entries())) + .add_cell(Cell::new(branch_pages)) + .add_cell(Cell::new(leaf_pages)) + .add_cell(Cell::new(overflow_pages)) + .add_cell(Cell::new(human_bytes(table_size as f64))); + table.add_row(row); + } + + let max_widths = table.column_max_content_widths(); + let mut seperator = Row::new(); + for width in max_widths { + seperator.add_cell(Cell::new("-".repeat(width as usize))); + } + table.add_row(seperator); + + let mut row = Row::new(); + row.add_cell(Cell::new("Tables")) + .add_cell(Cell::new("")) + .add_cell(Cell::new("")) + .add_cell(Cell::new("")) + .add_cell(Cell::new("")) + .add_cell(Cell::new(human_bytes(total_size as f64))); + table.add_row(row); + + let freelist = tx.inner.env().freelist()?; + let freelist_size = + freelist * tx.inner.db_stat(&mdbx::Database::freelist_db())?.page_size() as usize; + + let mut row = Row::new(); + row.add_cell(Cell::new("Freelist")) + .add_cell(Cell::new(freelist)) + .add_cell(Cell::new("")) + .add_cell(Cell::new("")) + .add_cell(Cell::new("")) + .add_cell(Cell::new(human_bytes(freelist_size as f64))); + table.add_row(row); + + Ok::<(), eyre::Report>(()) + })??; + + Ok(table) + } + + fn static_files_stats_table( + &self, + data_dir: ChainPath, + ) -> eyre::Result { + let mut table = ComfyTable::new(); + table.load_preset(comfy_table::presets::ASCII_MARKDOWN); + + if !self.only_total_size { + table.set_header([ + "Segment", + "Block Range", + "Transaction Range", + "Shape (columns x rows)", + "Data Size", + "Index Size", + "Offsets Size", + "Config Size", + "Total Size", + ]); + } else { + table.set_header([ + "Segment", + "Block Range", + "Transaction Range", + "Shape (columns x rows)", + "Size", + ]); + } + + let static_files = iter_static_files(data_dir.static_files_path())?; + let static_file_provider = StaticFileProvider::new(data_dir.static_files_path())?; + + let mut total_data_size = 0; + let mut total_index_size = 0; + let mut total_offsets_size = 0; + let mut total_config_size = 0; + + for (segment, ranges) in static_files.into_iter().sorted_by_key(|(segment, _)| *segment) { + let ( + mut segment_columns, + mut segment_rows, + mut segment_data_size, + mut segment_index_size, + mut segment_offsets_size, + mut segment_config_size, + ) = (0, 0, 0, 0, 0, 0); + + for (block_range, tx_range) in &ranges { + let fixed_block_range = find_fixed_range(block_range.start()); + let jar_provider = static_file_provider + .get_segment_provider(segment, || Some(fixed_block_range), None)? + .expect("something went wrong"); + + let columns = jar_provider.columns(); + let rows = jar_provider.rows(); + let data_size = File::open(jar_provider.data_path()) + .and_then(|file| file.metadata()) + .map(|metadata| metadata.len()) + .unwrap_or_default(); + let index_size = File::open(jar_provider.index_path()) + .and_then(|file| file.metadata()) + .map(|metadata| metadata.len()) + .unwrap_or_default(); + let offsets_size = File::open(jar_provider.offsets_path()) + .and_then(|file| file.metadata()) + .map(|metadata| metadata.len()) + .unwrap_or_default(); + let config_size = File::open(jar_provider.config_path()) + .and_then(|file| file.metadata()) + .map(|metadata| metadata.len()) + .unwrap_or_default(); + + if self.summary { + if segment_columns > 0 { + assert_eq!(segment_columns, columns); + } else { + segment_columns = columns; + } + segment_rows += rows; + segment_data_size += data_size; + segment_index_size += index_size; + segment_offsets_size += offsets_size; + segment_config_size += config_size; + } else { + let mut row = Row::new(); + row.add_cell(Cell::new(segment)) + .add_cell(Cell::new(format!("{block_range}"))) + .add_cell(Cell::new( + tx_range.map_or("N/A".to_string(), |tx_range| format!("{tx_range}")), + )) + .add_cell(Cell::new(format!("{columns} x {rows}"))); + if !self.only_total_size { + row.add_cell(Cell::new(human_bytes(data_size as f64))) + .add_cell(Cell::new(human_bytes(index_size as f64))) + .add_cell(Cell::new(human_bytes(offsets_size as f64))) + .add_cell(Cell::new(human_bytes(config_size as f64))); + } + row.add_cell(Cell::new(human_bytes( + (data_size + index_size + offsets_size + config_size) as f64, + ))); + table.add_row(row); + } + + total_data_size += data_size; + total_index_size += index_size; + total_offsets_size += offsets_size; + total_config_size += config_size; + } + + if self.summary { + let first_ranges = ranges.first().expect("not empty list of ranges"); + let last_ranges = ranges.last().expect("not empty list of ranges"); + + let block_range = + SegmentRangeInclusive::new(first_ranges.0.start(), last_ranges.0.end()); + let tx_range = first_ranges + .1 + .zip(last_ranges.1) + .map(|(first, last)| SegmentRangeInclusive::new(first.start(), last.end())); + + let mut row = Row::new(); + row.add_cell(Cell::new(segment)) + .add_cell(Cell::new(format!("{block_range}"))) + .add_cell(Cell::new( + tx_range.map_or("N/A".to_string(), |tx_range| format!("{tx_range}")), + )) + .add_cell(Cell::new(format!("{segment_columns} x {segment_rows}"))); + if !self.only_total_size { + row.add_cell(Cell::new(human_bytes(segment_data_size as f64))) + .add_cell(Cell::new(human_bytes(segment_index_size as f64))) + .add_cell(Cell::new(human_bytes(segment_offsets_size as f64))) + .add_cell(Cell::new(human_bytes(segment_config_size as f64))); + } + row.add_cell(Cell::new(human_bytes( + (segment_data_size + + segment_index_size + + segment_offsets_size + + segment_config_size) as f64, + ))); + table.add_row(row); + } + } + + let max_widths = table.column_max_content_widths(); + let mut seperator = Row::new(); + for width in max_widths { + seperator.add_cell(Cell::new("-".repeat(width as usize))); + } + table.add_row(seperator); + + let mut row = Row::new(); + row.add_cell(Cell::new("Total")) + .add_cell(Cell::new("")) + .add_cell(Cell::new("")) + .add_cell(Cell::new("")); + if !self.only_total_size { + row.add_cell(Cell::new(human_bytes(total_data_size as f64))) + .add_cell(Cell::new(human_bytes(total_index_size as f64))) + .add_cell(Cell::new(human_bytes(total_offsets_size as f64))) + .add_cell(Cell::new(human_bytes(total_config_size as f64))); + } + row.add_cell(Cell::new(human_bytes( + (total_data_size + total_index_size + total_offsets_size + total_config_size) as f64, + ))); + table.add_row(row); + + Ok(table) + } +} diff --git a/bin/reth/src/commands/debug_cmd/build_block.rs b/bin/reth/src/commands/debug_cmd/build_block.rs index beb01873368e..67215aa54857 100644 --- a/bin/reth/src/commands/debug_cmd/build_block.rs +++ b/bin/reth/src/commands/debug_cmd/build_block.rs @@ -116,7 +116,11 @@ impl Command { /// /// If the database is empty, returns the genesis block. fn lookup_best_block(&self, db: Arc) -> RethResult> { - let factory = ProviderFactory::new(db, self.chain.clone()); + let factory = ProviderFactory::new( + db, + self.chain.clone(), + self.datadir.unwrap_or_chain_default(self.chain.chain).static_files_path(), + )?; let provider = factory.provider()?; let best_number = @@ -155,7 +159,11 @@ impl Command { // initialize the database let db = Arc::new(init_db(db_path, DatabaseArguments::default().log_level(self.db.log_level))?); - let provider_factory = ProviderFactory::new(Arc::clone(&db), Arc::clone(&self.chain)); + let provider_factory = ProviderFactory::new( + Arc::clone(&db), + Arc::clone(&self.chain), + data_dir.static_files_path(), + )?; let consensus: Arc = Arc::new(BeaconConsensus::new(Arc::clone(&self.chain))); diff --git a/bin/reth/src/commands/debug_cmd/execution.rs b/bin/reth/src/commands/debug_cmd/execution.rs index 542a50b5e675..565852b7d7a5 100644 --- a/bin/reth/src/commands/debug_cmd/execution.rs +++ b/bin/reth/src/commands/debug_cmd/execution.rs @@ -27,13 +27,16 @@ use reth_network::{NetworkEvents, NetworkHandle}; use reth_network_api::NetworkInfo; use reth_node_core::init::init_genesis; use reth_node_ethereum::EthEvmConfig; -use reth_primitives::{fs, stage::StageId, BlockHashOrNumber, BlockNumber, ChainSpec, B256}; +use reth_primitives::{ + fs, stage::StageId, BlockHashOrNumber, BlockNumber, ChainSpec, PruneModes, B256, +}; use reth_provider::{BlockExecutionWriter, HeaderSyncMode, ProviderFactory, StageCheckpointReader}; use reth_stages::{ sets::DefaultStages, - stages::{ExecutionStage, ExecutionStageThresholds, SenderRecoveryStage, TotalDifficultyStage}, + stages::{ExecutionStage, ExecutionStageThresholds, SenderRecoveryStage}, Pipeline, StageSet, }; +use reth_static_file::StaticFileProducer; use reth_tasks::TaskExecutor; use std::{ net::{SocketAddr, SocketAddrV4}, @@ -92,6 +95,7 @@ impl Command { consensus: Arc, provider_factory: ProviderFactory, task_executor: &TaskExecutor, + static_file_producer: StaticFileProducer, ) -> eyre::Result> where DB: Database + Unpin + Clone + 'static, @@ -123,11 +127,7 @@ impl Command { header_downloader, body_downloader, factory.clone(), - ) - .set( - TotalDifficultyStage::new(consensus) - .with_commit_threshold(stage_conf.total_difficulty.commit_threshold), - ) + )? .set(SenderRecoveryStage { commit_threshold: stage_conf.sender_recovery.commit_threshold, }) @@ -147,7 +147,7 @@ impl Command { config.prune.clone().map(|prune| prune.segments).unwrap_or_default(), )), ) - .build(provider_factory); + .build(provider_factory, static_file_producer); Ok(pipeline) } @@ -170,7 +170,11 @@ impl Command { self.network.discovery.addr, self.network.discovery.port, ))) - .build(ProviderFactory::new(db, self.chain.clone())) + .build(ProviderFactory::new( + db, + self.chain.clone(), + self.datadir.unwrap_or_chain_default(self.chain.chain).static_files_path(), + )?) .start_network() .await?; info!(target: "reth::cli", peer_id = %network.peer_id(), local_addr = %network.local_addr(), "Connected to P2P network"); @@ -206,10 +210,11 @@ impl Command { fs::create_dir_all(&db_path)?; let db = Arc::new(init_db(db_path, DatabaseArguments::default().log_level(self.db.log_level))?); - let provider_factory = ProviderFactory::new(db.clone(), self.chain.clone()); + let provider_factory = + ProviderFactory::new(db.clone(), self.chain.clone(), data_dir.static_files_path())?; debug!(target: "reth::cli", chain=%self.chain.chain, genesis=?self.chain.genesis_hash(), "Initializing genesis"); - init_genesis(db.clone(), self.chain.clone())?; + init_genesis(provider_factory.clone())?; let consensus: Arc = Arc::new(BeaconConsensus::new(Arc::clone(&self.chain))); @@ -226,6 +231,12 @@ impl Command { ) .await?; + let static_file_producer = StaticFileProducer::new( + provider_factory.clone(), + provider_factory.static_file_provider(), + PruneModes::default(), + ); + // Configure the pipeline let fetch_client = network.fetch_client().await?; let mut pipeline = self.build_pipeline( @@ -234,6 +245,7 @@ impl Command { Arc::clone(&consensus), provider_factory.clone(), &ctx.task_executor, + static_file_producer, )?; let provider = provider_factory.provider()?; diff --git a/bin/reth/src/commands/debug_cmd/in_memory_merkle.rs b/bin/reth/src/commands/debug_cmd/in_memory_merkle.rs index fffda86b42c9..54af17e97be1 100644 --- a/bin/reth/src/commands/debug_cmd/in_memory_merkle.rs +++ b/bin/reth/src/commands/debug_cmd/in_memory_merkle.rs @@ -20,9 +20,8 @@ use reth_network_api::NetworkInfo; use reth_node_ethereum::EthEvmConfig; use reth_primitives::{fs, stage::StageId, BlockHashOrNumber, ChainSpec}; use reth_provider::{ - AccountExtReader, BlockWriter, ExecutorFactory, HashingWriter, HeaderProvider, - LatestStateProviderRef, OriginalValuesKnown, ProviderFactory, StageCheckpointReader, - StorageReader, + AccountExtReader, ExecutorFactory, HashingWriter, HeaderProvider, LatestStateProviderRef, + OriginalValuesKnown, ProviderFactory, StageCheckpointReader, StorageReader, }; use reth_tasks::TaskExecutor; use reth_trie::{updates::TrieKey, StateRoot}; @@ -95,7 +94,11 @@ impl Command { self.network.discovery.addr, self.network.discovery.port, ))) - .build(ProviderFactory::new(db, self.chain.clone())) + .build(ProviderFactory::new( + db, + self.chain.clone(), + self.datadir.unwrap_or_chain_default(self.chain.chain).static_files_path(), + )?) .start_network() .await?; info!(target: "reth::cli", peer_id = %network.peer_id(), local_addr = %network.local_addr(), "Connected to P2P network"); @@ -115,7 +118,7 @@ impl Command { // initialize the database let db = Arc::new(init_db(db_path, DatabaseArguments::default().log_level(self.db.log_level))?); - let factory = ProviderFactory::new(&db, self.chain.clone()); + let factory = ProviderFactory::new(&db, self.chain.clone(), data_dir.static_files_path())?; let provider = factory.provider()?; // Look up merkle checkpoint @@ -165,8 +168,10 @@ impl Command { let executor_factory = reth_revm::EvmProcessorFactory::new(self.chain.clone(), EthEvmConfig::default()); - let mut executor = - executor_factory.with_state(LatestStateProviderRef::new(provider.tx_ref())); + let mut executor = executor_factory.with_state(LatestStateProviderRef::new( + provider.tx_ref(), + factory.static_file_provider(), + )); let merkle_block_td = provider.header_td_by_number(merkle_block_number)?.unwrap_or_default(); @@ -192,14 +197,14 @@ impl Command { let provider_rw = factory.provider_rw()?; // Insert block, state and hashes - provider_rw.insert_block( + provider_rw.insert_historical_block( block .clone() .try_seal_with_senders() .map_err(|_| BlockValidationError::SenderRecoveryError)?, None, )?; - block_state.write_to_db(provider_rw.tx_ref(), OriginalValuesKnown::No)?; + block_state.write_to_storage(provider_rw.tx_ref(), None, OriginalValuesKnown::No)?; let storage_lists = provider_rw.changed_storages_with_range(block.number..=block.number)?; let storages = provider_rw.plain_state_storages(storage_lists)?; provider_rw.insert_storage_for_hashing(storages)?; diff --git a/bin/reth/src/commands/debug_cmd/merkle.rs b/bin/reth/src/commands/debug_cmd/merkle.rs index dbc4879938e6..8a5cce295a26 100644 --- a/bin/reth/src/commands/debug_cmd/merkle.rs +++ b/bin/reth/src/commands/debug_cmd/merkle.rs @@ -105,7 +105,11 @@ impl Command { self.network.discovery.addr, self.network.discovery.port, ))) - .build(ProviderFactory::new(db, self.chain.clone())) + .build(ProviderFactory::new( + db, + self.chain.clone(), + self.datadir.unwrap_or_chain_default(self.chain.chain).static_files_path(), + )?) .start_network() .await?; info!(target: "reth::cli", peer_id = %network.peer_id(), local_addr = %network.local_addr(), "Connected to P2P network"); @@ -125,7 +129,7 @@ impl Command { // initialize the database let db = Arc::new(init_db(db_path, DatabaseArguments::default().log_level(self.db.log_level))?); - let factory = ProviderFactory::new(&db, self.chain.clone()); + let factory = ProviderFactory::new(&db, self.chain.clone(), data_dir.static_files_path())?; let provider_rw = factory.provider_rw()?; // Configure and build network diff --git a/bin/reth/src/commands/debug_cmd/replay_engine.rs b/bin/reth/src/commands/debug_cmd/replay_engine.rs index a18b9a9568a4..374844d1bbc6 100644 --- a/bin/reth/src/commands/debug_cmd/replay_engine.rs +++ b/bin/reth/src/commands/debug_cmd/replay_engine.rs @@ -25,10 +25,11 @@ use reth_node_ethereum::{EthEngineTypes, EthEvmConfig}; #[cfg(feature = "optimism")] use reth_node_optimism::{OptimismEngineTypes, OptimismEvmConfig}; use reth_payload_builder::{PayloadBuilderHandle, PayloadBuilderService}; -use reth_primitives::{fs, ChainSpec}; +use reth_primitives::{fs, ChainSpec, PruneModes}; use reth_provider::{providers::BlockchainProvider, CanonStateSubscriptions, ProviderFactory}; use reth_revm::EvmProcessorFactory; use reth_stages::Pipeline; +use reth_static_file::StaticFileProducer; use reth_tasks::TaskExecutor; use reth_transaction_pool::noop::NoopTransactionPool; use std::{ @@ -101,7 +102,11 @@ impl Command { self.network.discovery.addr, self.network.discovery.port, ))) - .build(ProviderFactory::new(db, self.chain.clone())) + .build(ProviderFactory::new( + db, + self.chain.clone(), + self.datadir.unwrap_or_chain_default(self.chain.chain).static_files_path(), + )?) .start_network() .await?; info!(target: "reth::cli", peer_id = %network.peer_id(), local_addr = %network.local_addr(), "Connected to P2P network"); @@ -121,7 +126,8 @@ impl Command { // Initialize the database let db = Arc::new(init_db(db_path, DatabaseArguments::default().log_level(self.db.log_level))?); - let provider_factory = ProviderFactory::new(db.clone(), self.chain.clone()); + let provider_factory = + ProviderFactory::new(db.clone(), self.chain.clone(), data_dir.static_files_path())?; let consensus: Arc = Arc::new(BeaconConsensus::new(Arc::clone(&self.chain))); @@ -191,7 +197,14 @@ impl Command { let (consensus_engine_tx, consensus_engine_rx) = mpsc::unbounded_channel(); let (beacon_consensus_engine, beacon_engine_handle) = BeaconConsensusEngine::with_channel( network_client, - Pipeline::builder().build(provider_factory), + Pipeline::builder().build( + provider_factory.clone(), + StaticFileProducer::new( + provider_factory.clone(), + provider_factory.static_file_provider(), + PruneModes::default(), + ), + ), blockchain_db.clone(), Box::new(ctx.task_executor.clone()), Box::new(network), diff --git a/bin/reth/src/commands/import.rs b/bin/reth/src/commands/import.rs index ff0cf6080c42..f492a456d23c 100644 --- a/bin/reth/src/commands/import.rs +++ b/bin/reth/src/commands/import.rs @@ -21,12 +21,13 @@ use reth_downloaders::{ use reth_interfaces::consensus::Consensus; use reth_node_core::{events::node::NodeEvent, init::init_genesis}; use reth_node_ethereum::EthEvmConfig; -use reth_primitives::{stage::StageId, ChainSpec, B256}; +use reth_primitives::{stage::StageId, ChainSpec, PruneModes, B256}; use reth_provider::{HeaderSyncMode, ProviderFactory, StageCheckpointReader}; use reth_stages::{ prelude::*, - stages::{ExecutionStage, ExecutionStageThresholds, SenderRecoveryStage, TotalDifficultyStage}, + stages::{ExecutionStage, ExecutionStageThresholds, SenderRecoveryStage}, }; +use reth_static_file::StaticFileProducer; use std::{path::PathBuf, sync::Arc}; use tokio::sync::watch; use tracing::{debug, info}; @@ -89,11 +90,12 @@ impl ImportCommand { let db = Arc::new(init_db(db_path, DatabaseArguments::default().log_level(self.db.log_level))?); info!(target: "reth::cli", "Database opened"); - let provider_factory = ProviderFactory::new(db.clone(), self.chain.clone()); + let provider_factory = + ProviderFactory::new(db.clone(), self.chain.clone(), data_dir.static_files_path())?; debug!(target: "reth::cli", chain=%self.chain.chain, genesis=?self.chain.genesis_hash(), "Initializing genesis"); - init_genesis(db.clone(), self.chain.clone())?; + init_genesis(provider_factory.clone())?; let consensus = Arc::new(BeaconConsensus::new(self.chain.clone())); info!(target: "reth::cli", "Consensus engine initialized"); @@ -106,8 +108,20 @@ impl ImportCommand { let tip = file_client.tip().expect("file client has no tip"); info!(target: "reth::cli", "Chain file imported"); + let static_file_producer = StaticFileProducer::new( + provider_factory.clone(), + provider_factory.static_file_provider(), + PruneModes::default(), + ); + let (mut pipeline, events) = self - .build_import_pipeline(config, provider_factory.clone(), &consensus, file_client) + .build_import_pipeline( + config, + provider_factory.clone(), + &consensus, + file_client, + static_file_producer, + ) .await?; // override the tip @@ -142,6 +156,7 @@ impl ImportCommand { provider_factory: ProviderFactory, consensus: &Arc, file_client: Arc, + static_file_producer: StaticFileProducer, ) -> eyre::Result<(Pipeline, impl Stream)> where DB: Database + Clone + Unpin + 'static, @@ -176,11 +191,7 @@ impl ImportCommand { header_downloader, body_downloader, factory.clone(), - ) - .set( - TotalDifficultyStage::new(consensus.clone()) - .with_commit_threshold(config.stages.total_difficulty.commit_threshold), - ) + )? .set(SenderRecoveryStage { commit_threshold: config.stages.sender_recovery.commit_threshold, }) @@ -201,7 +212,7 @@ impl ImportCommand { config.prune.map(|prune| prune.segments).unwrap_or_default(), )), ) - .build(provider_factory); + .build(provider_factory, static_file_producer); let events = pipeline.events().map(Into::into); diff --git a/bin/reth/src/commands/init_cmd.rs b/bin/reth/src/commands/init_cmd.rs index 9ea949e6b479..abd3edc5bddf 100644 --- a/bin/reth/src/commands/init_cmd.rs +++ b/bin/reth/src/commands/init_cmd.rs @@ -11,6 +11,7 @@ use clap::Parser; use reth_db::{init_db, mdbx::DatabaseArguments}; use reth_node_core::init::init_genesis; use reth_primitives::ChainSpec; +use reth_provider::ProviderFactory; use std::sync::Arc; use tracing::info; @@ -56,8 +57,12 @@ impl InitCommand { Arc::new(init_db(&db_path, DatabaseArguments::default().log_level(self.db.log_level))?); info!(target: "reth::cli", "Database opened"); + let provider_factory = + ProviderFactory::new(db.clone(), self.chain.clone(), data_dir.static_files_path())?; + info!(target: "reth::cli", "Writing genesis block"); - let hash = init_genesis(db, self.chain)?; + + let hash = init_genesis(provider_factory)?; info!(target: "reth::cli", hash = ?hash, "Genesis block written"); Ok(()) diff --git a/bin/reth/src/commands/p2p/mod.rs b/bin/reth/src/commands/p2p/mod.rs index f307b0dcd0dd..0dbcd6c270bf 100644 --- a/bin/reth/src/commands/p2p/mod.rs +++ b/bin/reth/src/commands/p2p/mod.rs @@ -131,7 +131,11 @@ impl Command { network_config_builder = self.discovery.apply_to_builder(network_config_builder); let network = network_config_builder - .build(Arc::new(ProviderFactory::new(noop_db, self.chain.clone()))) + .build(Arc::new(ProviderFactory::new( + noop_db, + self.chain.clone(), + data_dir.static_files_path(), + )?)) .start_network() .await?; diff --git a/bin/reth/src/commands/recover/storage_tries.rs b/bin/reth/src/commands/recover/storage_tries.rs index 37e03743cbb7..b0b2550cbb1a 100644 --- a/bin/reth/src/commands/recover/storage_tries.rs +++ b/bin/reth/src/commands/recover/storage_tries.rs @@ -50,10 +50,11 @@ impl Command { fs::create_dir_all(&db_path)?; let db = Arc::new(init_db(db_path, Default::default())?); + let factory = ProviderFactory::new(&db, self.chain.clone(), data_dir.static_files_path())?; + debug!(target: "reth::cli", chain=%self.chain.chain, genesis=?self.chain.genesis_hash(), "Initializing genesis"); - init_genesis(db.clone(), self.chain.clone())?; + init_genesis(factory.clone())?; - let factory = ProviderFactory::new(&db, self.chain); let mut provider = factory.provider_rw()?; let best_block = provider.best_block_number()?; let best_header = provider @@ -62,7 +63,7 @@ impl Command { let mut deleted_tries = 0; let tx_mut = provider.tx_mut(); - let mut hashed_account_cursor = tx_mut.cursor_read::()?; + let mut hashed_account_cursor = tx_mut.cursor_read::()?; let mut storage_trie_cursor = tx_mut.cursor_dup_read::()?; let mut entry = storage_trie_cursor.first()?; diff --git a/bin/reth/src/commands/stage/drop.rs b/bin/reth/src/commands/stage/drop.rs index 3187932c2a08..3a5620331c97 100644 --- a/bin/reth/src/commands/stage/drop.rs +++ b/bin/reth/src/commands/stage/drop.rs @@ -10,12 +10,15 @@ use crate::{ }; use clap::Parser; use reth_db::{ - database::Database, mdbx::DatabaseArguments, open_db, tables, transaction::DbTxMut, DatabaseEnv, + database::Database, mdbx::DatabaseArguments, open_db, static_file::iter_static_files, tables, + transaction::DbTxMut, DatabaseEnv, }; use reth_node_core::init::{insert_genesis_header, insert_genesis_state}; -use reth_primitives::{fs, stage::StageId, ChainSpec}; +use reth_primitives::{ + fs, stage::StageId, static_file::find_fixed_range, ChainSpec, StaticFileSegment, +}; +use reth_provider::ProviderFactory; use std::sync::Arc; -use tracing::info; /// `reth drop-stage` command #[derive(Debug, Parser)] @@ -58,23 +61,59 @@ impl Command { let db = open_db(db_path.as_ref(), DatabaseArguments::default().log_level(self.db.log_level))?; + let provider_factory = + ProviderFactory::new(db, self.chain.clone(), data_dir.static_files_path())?; + let static_file_provider = provider_factory.static_file_provider(); + + let tool = DbTool::new(provider_factory, self.chain.clone())?; - let tool = DbTool::new(&db, self.chain.clone())?; + let static_file_segment = match self.stage { + StageEnum::Headers => Some(StaticFileSegment::Headers), + StageEnum::Bodies => Some(StaticFileSegment::Transactions), + StageEnum::Execution => Some(StaticFileSegment::Receipts), + _ => None, + }; - tool.db.update(|tx| { - match &self.stage { + // Delete static file segment data before inserting the genesis header below + if let Some(static_file_segment) = static_file_segment { + let static_file_provider = tool.provider_factory.static_file_provider(); + let static_files = iter_static_files(static_file_provider.directory())?; + if let Some(segment_static_files) = static_files.get(&static_file_segment) { + for (block_range, _) in segment_static_files { + static_file_provider + .delete_jar(static_file_segment, find_fixed_range(block_range.start()))?; + } + } + } + + tool.provider_factory.db_ref().update(|tx| { + match self.stage { + StageEnum::Headers => { + tx.clear::()?; + tx.clear::()?; + tx.clear::()?; + tx.clear::()?; + tx.put::( + StageId::Headers.to_string(), + Default::default(), + )?; + insert_genesis_header::(tx, static_file_provider, self.chain)?; + } StageEnum::Bodies => { tx.clear::()?; tx.clear::()?; - tx.clear::()?; + tx.clear::()?; tx.clear::()?; tx.clear::()?; - tx.put::(StageId::Bodies.to_string(), Default::default())?; - insert_genesis_header::(tx, self.chain)?; + tx.put::( + StageId::Bodies.to_string(), + Default::default(), + )?; + insert_genesis_header::(tx, static_file_provider, self.chain)?; } StageEnum::Senders => { - tx.clear::()?; - tx.put::( + tx.clear::()?; + tx.put::( StageId::SenderRecovery.to_string(), Default::default(), )?; @@ -82,41 +121,41 @@ impl Command { StageEnum::Execution => { tx.clear::()?; tx.clear::()?; - tx.clear::()?; - tx.clear::()?; + tx.clear::()?; + tx.clear::()?; tx.clear::()?; tx.clear::()?; - tx.put::( + tx.put::( StageId::Execution.to_string(), Default::default(), )?; insert_genesis_state::(tx, self.chain.genesis())?; } StageEnum::AccountHashing => { - tx.clear::()?; - tx.put::( + tx.clear::()?; + tx.put::( StageId::AccountHashing.to_string(), Default::default(), )?; } StageEnum::StorageHashing => { - tx.clear::()?; - tx.put::( + tx.clear::()?; + tx.put::( StageId::StorageHashing.to_string(), Default::default(), )?; } StageEnum::Hashing => { // Clear hashed accounts - tx.clear::()?; - tx.put::( + tx.clear::()?; + tx.put::( StageId::AccountHashing.to_string(), Default::default(), )?; // Clear hashed storages - tx.clear::()?; - tx.put::( + tx.clear::()?; + tx.put::( StageId::StorageHashing.to_string(), Default::default(), )?; @@ -124,54 +163,42 @@ impl Command { StageEnum::Merkle => { tx.clear::()?; tx.clear::()?; - tx.put::( + tx.put::( StageId::MerkleExecute.to_string(), Default::default(), )?; - tx.put::( + tx.put::( StageId::MerkleUnwind.to_string(), Default::default(), )?; - tx.delete::( + tx.delete::( StageId::MerkleExecute.to_string(), None, )?; } StageEnum::AccountHistory | StageEnum::StorageHistory => { - tx.clear::()?; - tx.clear::()?; - tx.put::( + tx.clear::()?; + tx.clear::()?; + tx.put::( StageId::IndexAccountHistory.to_string(), Default::default(), )?; - tx.put::( + tx.put::( StageId::IndexStorageHistory.to_string(), Default::default(), )?; } - StageEnum::TotalDifficulty => { - tx.clear::()?; - tx.put::( - StageId::TotalDifficulty.to_string(), - Default::default(), - )?; - insert_genesis_header::(tx, self.chain)?; - } StageEnum::TxLookup => { - tx.clear::()?; - tx.put::( + tx.clear::()?; + tx.put::( StageId::TransactionLookup.to_string(), Default::default(), )?; - insert_genesis_header::(tx, self.chain)?; - } - _ => { - info!("Nothing to do for stage {:?}", self.stage); - return Ok(()) + insert_genesis_header::(tx, static_file_provider, self.chain)?; } } - tx.put::(StageId::Finish.to_string(), Default::default())?; + tx.put::(StageId::Finish.to_string(), Default::default())?; Ok::<_, eyre::Error>(()) })??; diff --git a/bin/reth/src/commands/stage/dump/execution.rs b/bin/reth/src/commands/stage/dump/execution.rs index b6a2c94cf80e..3234367fe8ac 100644 --- a/bin/reth/src/commands/stage/dump/execution.rs +++ b/bin/reth/src/commands/stage/dump/execution.rs @@ -5,29 +5,38 @@ use reth_db::{ cursor::DbCursorRO, database::Database, table::TableImporter, tables, transaction::DbTx, DatabaseEnv, }; +use reth_node_core::dirs::{ChainPath, DataDirPath}; use reth_node_ethereum::EthEvmConfig; -use reth_primitives::{stage::StageCheckpoint, ChainSpec}; -use reth_provider::ProviderFactory; +use reth_primitives::stage::StageCheckpoint; +use reth_provider::{ChainSpecProvider, ProviderFactory}; use reth_revm::EvmProcessorFactory; use reth_stages::{stages::ExecutionStage, Stage, UnwindInput}; -use std::{path::PathBuf, sync::Arc}; use tracing::info; pub(crate) async fn dump_execution_stage( - db_tool: &DbTool<'_, DB>, + db_tool: &DbTool, from: u64, to: u64, - output_db: &PathBuf, + output_datadir: ChainPath, should_run: bool, ) -> Result<()> { - let (output_db, tip_block_number) = setup(from, to, output_db, db_tool)?; + let (output_db, tip_block_number) = setup(from, to, &output_datadir.db_path(), db_tool)?; import_tables_with_range(&output_db, db_tool, from, to)?; unwind_and_copy(db_tool, from, tip_block_number, &output_db).await?; if should_run { - dry_run(db_tool.chain.clone(), output_db, to, from).await?; + dry_run( + ProviderFactory::new( + output_db, + db_tool.chain.clone(), + output_datadir.static_files_path(), + )?, + to, + from, + ) + .await?; } Ok(()) @@ -36,30 +45,50 @@ pub(crate) async fn dump_execution_stage( /// Imports all the tables that can be copied over a range. fn import_tables_with_range( output_db: &DatabaseEnv, - db_tool: &DbTool<'_, DB>, + db_tool: &DbTool, from: u64, to: u64, ) -> eyre::Result<()> { // We're not sharing the transaction in case the memory grows too much. output_db.update(|tx| { - tx.import_table_with_range::(&db_tool.db.tx()?, Some(from), to) + tx.import_table_with_range::( + &db_tool.provider_factory.db_ref().tx()?, + Some(from), + to, + ) })??; output_db.update(|tx| { - tx.import_table_with_range::(&db_tool.db.tx()?, Some(from), to) + tx.import_table_with_range::( + &db_tool.provider_factory.db_ref().tx()?, + Some(from), + to, + ) })??; output_db.update(|tx| { - tx.import_table_with_range::(&db_tool.db.tx()?, Some(from), to) + tx.import_table_with_range::( + &db_tool.provider_factory.db_ref().tx()?, + Some(from), + to, + ) })??; output_db.update(|tx| { - tx.import_table_with_range::(&db_tool.db.tx()?, Some(from), to) + tx.import_table_with_range::( + &db_tool.provider_factory.db_ref().tx()?, + Some(from), + to, + ) })??; output_db.update(|tx| { - tx.import_table_with_range::(&db_tool.db.tx()?, Some(from), to) + tx.import_table_with_range::( + &db_tool.provider_factory.db_ref().tx()?, + Some(from), + to, + ) })??; // Find range of transactions that need to be copied over - let (from_tx, to_tx) = db_tool.db.view(|read_tx| { + let (from_tx, to_tx) = db_tool.provider_factory.db_ref().view(|read_tx| { let mut read_cursor = read_tx.cursor_read::()?; let (_, from_block) = read_cursor.seek(from)?.ok_or(eyre::eyre!("BlockBody {from} does not exist."))?; @@ -74,14 +103,18 @@ fn import_tables_with_range( output_db.update(|tx| { tx.import_table_with_range::( - &db_tool.db.tx()?, + &db_tool.provider_factory.db_ref().tx()?, Some(from_tx), to_tx, ) })??; output_db.update(|tx| { - tx.import_table_with_range::(&db_tool.db.tx()?, Some(from_tx), to_tx) + tx.import_table_with_range::( + &db_tool.provider_factory.db_ref().tx()?, + Some(from_tx), + to_tx, + ) })??; Ok(()) @@ -91,13 +124,12 @@ fn import_tables_with_range( /// PlainAccountState safely. There might be some state dependency from an address /// which hasn't been changed in the given range. async fn unwind_and_copy( - db_tool: &DbTool<'_, DB>, + db_tool: &DbTool, from: u64, tip_block_number: u64, output_db: &DatabaseEnv, ) -> eyre::Result<()> { - let factory = ProviderFactory::new(db_tool.db, db_tool.chain.clone()); - let provider = factory.provider_rw()?; + let provider = db_tool.provider_factory.provider_rw()?; let mut exec_stage = ExecutionStage::new_with_factory(EvmProcessorFactory::new( db_tool.chain.clone(), @@ -125,22 +157,20 @@ async fn unwind_and_copy( /// Try to re-execute the stage without committing async fn dry_run( - chain: Arc, - output_db: DB, + output_provider_factory: ProviderFactory, to: u64, from: u64, ) -> eyre::Result<()> { info!(target: "reth::cli", "Executing stage. [dry-run]"); - let factory = ProviderFactory::new(&output_db, chain.clone()); let mut exec_stage = ExecutionStage::new_with_factory(EvmProcessorFactory::new( - chain.clone(), + output_provider_factory.chain_spec().clone(), EthEvmConfig::default(), )); let input = reth_stages::ExecInput { target: Some(to), checkpoint: Some(StageCheckpoint::new(from)) }; - exec_stage.execute(&factory.provider_rw()?, input)?; + exec_stage.execute(&output_provider_factory.provider_rw()?, input)?; info!(target: "reth::cli", "Success"); diff --git a/bin/reth/src/commands/stage/dump/hashing_account.rs b/bin/reth/src/commands/stage/dump/hashing_account.rs index 7fe723257f69..1888f0e303e7 100644 --- a/bin/reth/src/commands/stage/dump/hashing_account.rs +++ b/bin/reth/src/commands/stage/dump/hashing_account.rs @@ -2,30 +2,43 @@ use super::setup; use crate::utils::DbTool; use eyre::Result; use reth_db::{database::Database, table::TableImporter, tables, DatabaseEnv}; -use reth_primitives::{stage::StageCheckpoint, BlockNumber, ChainSpec}; +use reth_node_core::dirs::{ChainPath, DataDirPath}; +use reth_primitives::{stage::StageCheckpoint, BlockNumber}; use reth_provider::ProviderFactory; use reth_stages::{stages::AccountHashingStage, Stage, UnwindInput}; -use std::{path::PathBuf, sync::Arc}; use tracing::info; pub(crate) async fn dump_hashing_account_stage( - db_tool: &DbTool<'_, DB>, + db_tool: &DbTool, from: BlockNumber, to: BlockNumber, - output_db: &PathBuf, + output_datadir: ChainPath, should_run: bool, ) -> Result<()> { - let (output_db, tip_block_number) = setup(from, to, output_db, db_tool)?; + let (output_db, tip_block_number) = setup(from, to, &output_datadir.db_path(), db_tool)?; // Import relevant AccountChangeSets output_db.update(|tx| { - tx.import_table_with_range::(&db_tool.db.tx()?, Some(from), to) + tx.import_table_with_range::( + &db_tool.provider_factory.db_ref().tx()?, + Some(from), + to, + ) })??; unwind_and_copy(db_tool, from, tip_block_number, &output_db)?; if should_run { - dry_run(db_tool.chain.clone(), output_db, to, from).await?; + dry_run( + ProviderFactory::new( + output_db, + db_tool.chain.clone(), + output_datadir.static_files_path(), + )?, + to, + from, + ) + .await?; } Ok(()) @@ -33,13 +46,12 @@ pub(crate) async fn dump_hashing_account_stage( /// Dry-run an unwind to FROM block and copy the necessary table data to the new database. fn unwind_and_copy( - db_tool: &DbTool<'_, DB>, + db_tool: &DbTool, from: u64, tip_block_number: u64, output_db: &DatabaseEnv, ) -> eyre::Result<()> { - let factory = ProviderFactory::new(db_tool.db, db_tool.chain.clone()); - let provider = factory.provider_rw()?; + let provider = db_tool.provider_factory.provider_rw()?; let mut exec_stage = AccountHashingStage::default(); exec_stage.unwind( @@ -59,15 +71,13 @@ fn unwind_and_copy( /// Try to re-execute the stage straightaway async fn dry_run( - chain: Arc, - output_db: DB, + output_provider_factory: ProviderFactory, to: u64, from: u64, ) -> eyre::Result<()> { info!(target: "reth::cli", "Executing stage."); - let factory = ProviderFactory::new(&output_db, chain); - let provider = factory.provider_rw()?; + let provider = output_provider_factory.provider_rw()?; let mut stage = AccountHashingStage { clean_threshold: 1, // Forces hashing from scratch ..Default::default() diff --git a/bin/reth/src/commands/stage/dump/hashing_storage.rs b/bin/reth/src/commands/stage/dump/hashing_storage.rs index 373818072529..7f827b25cd7a 100644 --- a/bin/reth/src/commands/stage/dump/hashing_storage.rs +++ b/bin/reth/src/commands/stage/dump/hashing_storage.rs @@ -2,25 +2,34 @@ use super::setup; use crate::utils::DbTool; use eyre::Result; use reth_db::{database::Database, table::TableImporter, tables, DatabaseEnv}; -use reth_primitives::{stage::StageCheckpoint, ChainSpec}; +use reth_node_core::dirs::{ChainPath, DataDirPath}; +use reth_primitives::stage::StageCheckpoint; use reth_provider::ProviderFactory; use reth_stages::{stages::StorageHashingStage, Stage, UnwindInput}; -use std::{path::PathBuf, sync::Arc}; use tracing::info; pub(crate) async fn dump_hashing_storage_stage( - db_tool: &DbTool<'_, DB>, + db_tool: &DbTool, from: u64, to: u64, - output_db: &PathBuf, + output_datadir: ChainPath, should_run: bool, ) -> Result<()> { - let (output_db, tip_block_number) = setup(from, to, output_db, db_tool)?; + let (output_db, tip_block_number) = setup(from, to, &output_datadir.db_path(), db_tool)?; unwind_and_copy(db_tool, from, tip_block_number, &output_db)?; if should_run { - dry_run(db_tool.chain.clone(), output_db, to, from).await?; + dry_run( + ProviderFactory::new( + output_db, + db_tool.chain.clone(), + output_datadir.static_files_path(), + )?, + to, + from, + ) + .await?; } Ok(()) @@ -28,13 +37,12 @@ pub(crate) async fn dump_hashing_storage_stage( /// Dry-run an unwind to FROM block and copy the necessary table data to the new database. fn unwind_and_copy( - db_tool: &DbTool<'_, DB>, + db_tool: &DbTool, from: u64, tip_block_number: u64, output_db: &DatabaseEnv, ) -> eyre::Result<()> { - let factory = ProviderFactory::new(db_tool.db, db_tool.chain.clone()); - let provider = factory.provider_rw()?; + let provider = db_tool.provider_factory.provider_rw()?; let mut exec_stage = StorageHashingStage::default(); @@ -51,22 +59,21 @@ fn unwind_and_copy( // TODO optimize we can actually just get the entries we need for both these tables output_db .update(|tx| tx.import_dupsort::(&unwind_inner_tx))??; - output_db.update(|tx| tx.import_dupsort::(&unwind_inner_tx))??; + output_db + .update(|tx| tx.import_dupsort::(&unwind_inner_tx))??; Ok(()) } /// Try to re-execute the stage straightaway async fn dry_run( - chain: Arc, - output_db: DB, + output_provider_factory: ProviderFactory, to: u64, from: u64, ) -> eyre::Result<()> { info!(target: "reth::cli", "Executing stage."); - let factory = ProviderFactory::new(&output_db, chain); - let provider = factory.provider_rw()?; + let provider = output_provider_factory.provider_rw()?; let mut stage = StorageHashingStage { clean_threshold: 1, // Forces hashing from scratch ..Default::default() diff --git a/bin/reth/src/commands/stage/dump/merkle.rs b/bin/reth/src/commands/stage/dump/merkle.rs index f0f1fc233f62..3e6d2e6352ce 100644 --- a/bin/reth/src/commands/stage/dump/merkle.rs +++ b/bin/reth/src/commands/stage/dump/merkle.rs @@ -2,8 +2,9 @@ use super::setup; use crate::utils::DbTool; use eyre::Result; use reth_db::{database::Database, table::TableImporter, tables, DatabaseEnv}; +use reth_node_core::dirs::{ChainPath, DataDirPath}; use reth_node_ethereum::EthEvmConfig; -use reth_primitives::{stage::StageCheckpoint, BlockNumber, ChainSpec, PruneModes}; +use reth_primitives::{stage::StageCheckpoint, BlockNumber, PruneModes}; use reth_provider::ProviderFactory; use reth_stages::{ stages::{ @@ -12,30 +13,46 @@ use reth_stages::{ }, Stage, UnwindInput, }; -use std::{path::PathBuf, sync::Arc}; use tracing::info; pub(crate) async fn dump_merkle_stage( - db_tool: &DbTool<'_, DB>, + db_tool: &DbTool, from: BlockNumber, to: BlockNumber, - output_db: &PathBuf, + output_datadir: ChainPath, should_run: bool, ) -> Result<()> { - let (output_db, tip_block_number) = setup(from, to, output_db, db_tool)?; + let (output_db, tip_block_number) = setup(from, to, &output_datadir.db_path(), db_tool)?; output_db.update(|tx| { - tx.import_table_with_range::(&db_tool.db.tx()?, Some(from), to) + tx.import_table_with_range::( + &db_tool.provider_factory.db_ref().tx()?, + Some(from), + to, + ) })??; output_db.update(|tx| { - tx.import_table_with_range::(&db_tool.db.tx()?, Some(from), to) + tx.import_table_with_range::( + &db_tool.provider_factory.db_ref().tx()?, + Some(from), + to, + ) })??; unwind_and_copy(db_tool, (from, to), tip_block_number, &output_db).await?; if should_run { - dry_run(db_tool.chain.clone(), output_db, to, from).await?; + dry_run( + ProviderFactory::new( + output_db, + db_tool.chain.clone(), + output_datadir.static_files_path(), + )?, + to, + from, + ) + .await?; } Ok(()) @@ -43,14 +60,13 @@ pub(crate) async fn dump_merkle_stage( /// Dry-run an unwind to FROM block and copy the necessary table data to the new database. async fn unwind_and_copy( - db_tool: &DbTool<'_, DB>, + db_tool: &DbTool, range: (u64, u64), tip_block_number: u64, output_db: &DatabaseEnv, ) -> eyre::Result<()> { let (from, to) = range; - let factory = ProviderFactory::new(db_tool.db, db_tool.chain.clone()); - let provider = factory.provider_rw()?; + let provider = db_tool.provider_factory.provider_rw()?; let unwind = UnwindInput { unwind_to: from, @@ -100,10 +116,11 @@ async fn unwind_and_copy( let unwind_inner_tx = provider.into_tx(); // TODO optimize we can actually just get the entries we need - output_db.update(|tx| tx.import_dupsort::(&unwind_inner_tx))??; + output_db + .update(|tx| tx.import_dupsort::(&unwind_inner_tx))??; - output_db.update(|tx| tx.import_table::(&unwind_inner_tx))??; - output_db.update(|tx| tx.import_dupsort::(&unwind_inner_tx))??; + output_db.update(|tx| tx.import_table::(&unwind_inner_tx))??; + output_db.update(|tx| tx.import_dupsort::(&unwind_inner_tx))??; output_db.update(|tx| tx.import_table::(&unwind_inner_tx))??; output_db.update(|tx| tx.import_dupsort::(&unwind_inner_tx))??; @@ -112,14 +129,12 @@ async fn unwind_and_copy( /// Try to re-execute the stage straightaway async fn dry_run( - chain: Arc, - output_db: DB, + output_provider_factory: ProviderFactory, to: u64, from: u64, ) -> eyre::Result<()> { info!(target: "reth::cli", "Executing stage."); - let factory = ProviderFactory::new(&output_db, chain); - let provider = factory.provider_rw()?; + let provider = output_provider_factory.provider_rw()?; let mut stage = MerkleStage::Execution { // Forces updating the root instead of calculating from scratch diff --git a/bin/reth/src/commands/stage/dump/mod.rs b/bin/reth/src/commands/stage/dump/mod.rs index 2a34e67ee621..f85ce0e8b399 100644 --- a/bin/reth/src/commands/stage/dump/mod.rs +++ b/bin/reth/src/commands/stage/dump/mod.rs @@ -14,7 +14,9 @@ use reth_db::{ cursor::DbCursorRO, database::Database, init_db, table::TableImporter, tables, transaction::DbTx, DatabaseEnv, }; +use reth_node_core::dirs::PlatformPath; use reth_primitives::ChainSpec; +use reth_provider::ProviderFactory; use std::{path::PathBuf, sync::Arc}; use tracing::info; @@ -79,9 +81,9 @@ pub enum Stages { /// Stage command that takes a range #[derive(Debug, Clone, Parser)] pub struct StageCommand { - /// The path to the new database folder. + /// The path to the new datadir folder. #[arg(long, value_name = "OUTPUT_PATH", verbatim_doc_comment)] - output_db: PathBuf, + output_datadir: PlatformPath, /// From which block. #[arg(long, short)] @@ -104,22 +106,53 @@ impl Command { info!(target: "reth::cli", path = ?db_path, "Opening database"); let db = Arc::new(init_db(db_path, DatabaseArguments::default().log_level(self.db.log_level))?); + let provider_factory = + ProviderFactory::new(db, self.chain.clone(), data_dir.static_files_path())?; + info!(target: "reth::cli", "Database opened"); - let tool = DbTool::new(&db, self.chain.clone())?; + let tool = DbTool::new(provider_factory, self.chain.clone())?; match &self.command { - Stages::Execution(StageCommand { output_db, from, to, dry_run, .. }) => { - dump_execution_stage(&tool, *from, *to, output_db, *dry_run).await? + Stages::Execution(StageCommand { output_datadir, from, to, dry_run, .. }) => { + dump_execution_stage( + &tool, + *from, + *to, + output_datadir.with_chain(self.chain.chain), + *dry_run, + ) + .await? } - Stages::StorageHashing(StageCommand { output_db, from, to, dry_run, .. }) => { - dump_hashing_storage_stage(&tool, *from, *to, output_db, *dry_run).await? + Stages::StorageHashing(StageCommand { output_datadir, from, to, dry_run, .. }) => { + dump_hashing_storage_stage( + &tool, + *from, + *to, + output_datadir.with_chain(self.chain.chain), + *dry_run, + ) + .await? } - Stages::AccountHashing(StageCommand { output_db, from, to, dry_run, .. }) => { - dump_hashing_account_stage(&tool, *from, *to, output_db, *dry_run).await? + Stages::AccountHashing(StageCommand { output_datadir, from, to, dry_run, .. }) => { + dump_hashing_account_stage( + &tool, + *from, + *to, + output_datadir.with_chain(self.chain.chain), + *dry_run, + ) + .await? } - Stages::Merkle(StageCommand { output_db, from, to, dry_run, .. }) => { - dump_merkle_stage(&tool, *from, *to, output_db, *dry_run).await? + Stages::Merkle(StageCommand { output_datadir, from, to, dry_run, .. }) => { + dump_merkle_stage( + &tool, + *from, + *to, + output_datadir.with_chain(self.chain.chain), + *dry_run, + ) + .await? } } @@ -133,24 +166,27 @@ pub(crate) fn setup( from: u64, to: u64, output_db: &PathBuf, - db_tool: &DbTool<'_, DB>, + db_tool: &DbTool, ) -> eyre::Result<(DatabaseEnv, u64)> { assert!(from < to, "FROM block should be bigger than TO block."); info!(target: "reth::cli", ?output_db, "Creating separate db"); - let output_db = init_db(output_db, Default::default())?; + let output_datadir = init_db(output_db, Default::default())?; - output_db.update(|tx| { + output_datadir.update(|tx| { tx.import_table_with_range::( - &db_tool.db.tx()?, + &db_tool.provider_factory.db_ref().tx()?, Some(from - 1), to + 1, ) })??; - let (tip_block_number, _) = - db_tool.db.view(|tx| tx.cursor_read::()?.last())??.expect("some"); + let (tip_block_number, _) = db_tool + .provider_factory + .db_ref() + .view(|tx| tx.cursor_read::()?.last())?? + .expect("some"); - Ok((output_db, tip_block_number)) + Ok((output_datadir, tip_block_number)) } diff --git a/bin/reth/src/commands/stage/run.rs b/bin/reth/src/commands/stage/run.rs index 5e2a6552bf54..5cd3ded0ba86 100644 --- a/bin/reth/src/commands/stage/run.rs +++ b/bin/reth/src/commands/stage/run.rs @@ -19,16 +19,16 @@ use reth_db::{init_db, mdbx::DatabaseArguments}; use reth_downloaders::bodies::bodies::BodiesDownloaderBuilder; use reth_node_ethereum::EthEvmConfig; use reth_primitives::ChainSpec; -use reth_provider::{ProviderFactory, StageCheckpointReader}; +use reth_provider::{ProviderFactory, StageCheckpointReader, StageCheckpointWriter}; use reth_stages::{ stages::{ AccountHashingStage, BodyStage, ExecutionStage, ExecutionStageThresholds, IndexAccountHistoryStage, IndexStorageHistoryStage, MerkleStage, SenderRecoveryStage, StorageHashingStage, TransactionLookupStage, }, - ExecInput, Stage, StageExt, UnwindInput, + ExecInput, ExecOutput, Stage, StageExt, UnwindInput, UnwindOutput, }; -use std::{any::Any, net::SocketAddr, path::PathBuf, sync::Arc}; +use std::{any::Any, net::SocketAddr, path::PathBuf, sync::Arc, time::Instant}; use tracing::*; /// `reth stage` command @@ -103,6 +103,10 @@ pub struct Command { // e.g. query the DB size, or any table data. #[arg(long, short)] commit: bool, + + /// Save stage checkpoints + #[arg(long)] + checkpoints: bool, } impl Command { @@ -127,7 +131,11 @@ impl Command { Arc::new(init_db(db_path, DatabaseArguments::default().log_level(self.db.log_level))?); info!(target: "reth::cli", "Database opened"); - let factory = ProviderFactory::new(Arc::clone(&db), self.chain.clone()); + let factory = ProviderFactory::new( + Arc::clone(&db), + self.chain.clone(), + data_dir.static_files_path(), + )?; let mut provider_rw = factory.provider_rw()?; if let Some(listen_addr) = self.metrics { @@ -165,8 +173,11 @@ impl Command { let default_peers_path = data_dir.known_peers_path(); - let provider_factory = - Arc::new(ProviderFactory::new(db.clone(), self.chain.clone())); + let provider_factory = Arc::new(ProviderFactory::new( + db.clone(), + self.chain.clone(), + data_dir.static_files_path(), + )?); let network = self .network @@ -250,8 +261,12 @@ impl Command { if !self.skip_unwind { while unwind.checkpoint.block_number > self.from { - let unwind_output = unwind_stage.unwind(&provider_rw, unwind)?; - unwind.checkpoint = unwind_output.checkpoint; + let UnwindOutput { checkpoint } = unwind_stage.unwind(&provider_rw, unwind)?; + unwind.checkpoint = checkpoint; + + if self.checkpoints { + provider_rw.save_stage_checkpoint(unwind_stage.id(), checkpoint)?; + } if self.commit { provider_rw.commit()?; @@ -265,21 +280,27 @@ impl Command { checkpoint: Some(checkpoint.with_block_number(self.from)), }; + let start = Instant::now(); + info!(target: "reth::cli", stage = %self.stage, "Executing stage"); loop { exec_stage.execute_ready(input).await?; - let output = exec_stage.execute(&provider_rw, input)?; + let ExecOutput { checkpoint, done } = exec_stage.execute(&provider_rw, input)?; - input.checkpoint = Some(output.checkpoint); + input.checkpoint = Some(checkpoint); + if self.checkpoints { + provider_rw.save_stage_checkpoint(exec_stage.id(), checkpoint)?; + } if self.commit { provider_rw.commit()?; provider_rw = factory.provider_rw()?; } - if output.done { + if done { break } } + info!(target: "reth::cli", stage = %self.stage, time = ?start.elapsed(), "Finished stage"); Ok(()) } diff --git a/bin/reth/src/commands/stage/unwind.rs b/bin/reth/src/commands/stage/unwind.rs index a5b568e4f8a9..44d4c2845fa1 100644 --- a/bin/reth/src/commands/stage/unwind.rs +++ b/bin/reth/src/commands/stage/unwind.rs @@ -68,7 +68,7 @@ impl Command { eyre::bail!("Cannot unwind genesis block") } - let factory = ProviderFactory::new(&db, self.chain.clone()); + let factory = ProviderFactory::new(&db, self.chain.clone(), data_dir.static_files_path())?; let provider = factory.provider_rw()?; let blocks_and_execution = provider diff --git a/bin/reth/src/commands/test_vectors/tables.rs b/bin/reth/src/commands/test_vectors/tables.rs index 15e0c0394dba..6399c81ac235 100644 --- a/bin/reth/src/commands/test_vectors/tables.rs +++ b/bin/reth/src/commands/test_vectors/tables.rs @@ -58,12 +58,12 @@ pub(crate) fn generate_vectors(mut tables: Vec) -> Result<()> { generate!([ (CanonicalHeaders, PER_TABLE, TABLE), - (HeaderTD, PER_TABLE, TABLE), + (HeaderTerminalDifficulties, PER_TABLE, TABLE), (HeaderNumbers, PER_TABLE, TABLE), (Headers, PER_TABLE, TABLE), (BlockBodyIndices, PER_TABLE, TABLE), (BlockOmmers, 100, TABLE), - (TxHashNumber, PER_TABLE, TABLE), + (TransactionHashNumbers, PER_TABLE, TABLE), (Transactions, 100, TABLE), (PlainStorageState, PER_TABLE, DUPSORT), (PlainAccountState, PER_TABLE, TABLE) diff --git a/bin/reth/src/utils.rs b/bin/reth/src/utils.rs index 1e95f5a53ba9..5c56476a84ea 100644 --- a/bin/reth/src/utils.rs +++ b/bin/reth/src/utils.rs @@ -10,6 +10,7 @@ use reth_db::{ DatabaseError, RawTable, TableRawRow, }; use reth_primitives::{fs, ChainSpec}; +use reth_provider::ProviderFactory; use std::{path::Path, rc::Rc, sync::Arc}; use tracing::info; @@ -24,17 +25,17 @@ pub use reth_node_core::utils::*; /// Wrapper over DB that implements many useful DB queries. #[derive(Debug)] -pub struct DbTool<'a, DB: Database> { - /// The database that the db tool will use. - pub db: &'a DB, +pub struct DbTool { + /// The provider factory that the db tool will use. + pub provider_factory: ProviderFactory, /// The [ChainSpec] that the db tool will use. pub chain: Arc, } -impl<'a, DB: Database> DbTool<'a, DB> { +impl DbTool { /// Takes a DB where the tables have already been created. - pub fn new(db: &'a DB, chain: Arc) -> eyre::Result { - Ok(Self { db, chain }) + pub fn new(provider_factory: ProviderFactory, chain: Arc) -> eyre::Result { + Ok(Self { provider_factory, chain }) } /// Grabs the contents of the table within a certain index range and places the @@ -50,7 +51,7 @@ impl<'a, DB: Database> DbTool<'a, DB> { let mut hits = 0; - let data = self.db.view(|tx| { + let data = self.provider_factory.db_ref().view(|tx| { let mut cursor = tx.cursor_read::>().expect("Was not able to obtain a cursor."); @@ -118,27 +119,38 @@ impl<'a, DB: Database> DbTool<'a, DB> { /// Grabs the content of the table for the given key pub fn get(&self, key: T::Key) -> Result> { - self.db.view(|tx| tx.get::(key))?.map_err(|e| eyre::eyre!(e)) + self.provider_factory.db_ref().view(|tx| tx.get::(key))?.map_err(|e| eyre::eyre!(e)) } /// Grabs the content of the DupSort table for the given key and subkey pub fn get_dup(&self, key: T::Key, subkey: T::SubKey) -> Result> { - self.db + self.provider_factory + .db_ref() .view(|tx| tx.cursor_dup_read::()?.seek_by_key_subkey(key, subkey))? .map_err(|e| eyre::eyre!(e)) } - /// Drops the database at the given path. - pub fn drop(&mut self, path: impl AsRef) -> Result<()> { - let path = path.as_ref(); - info!(target: "reth::cli", "Dropping database at {:?}", path); - fs::remove_dir_all(path)?; + /// Drops the database and the static files at the given path. + pub fn drop( + &mut self, + db_path: impl AsRef, + static_files_path: impl AsRef, + ) -> Result<()> { + let db_path = db_path.as_ref(); + info!(target: "reth::cli", "Dropping database at {:?}", db_path); + fs::remove_dir_all(db_path)?; + + let static_files_path = static_files_path.as_ref(); + info!(target: "reth::cli", "Dropping static files at {:?}", static_files_path); + fs::remove_dir_all(static_files_path)?; + fs::create_dir_all(static_files_path)?; + Ok(()) } /// Drops the provided table from the database. pub fn drop_table(&mut self) -> Result<()> { - self.db.update(|tx| tx.clear::())??; + self.provider_factory.db_ref().update(|tx| tx.clear::())??; Ok(()) } } diff --git a/book/SUMMARY.md b/book/SUMMARY.md index 1422cf1b706f..ffd5f67e0bda 100644 --- a/book/SUMMARY.md +++ b/book/SUMMARY.md @@ -37,9 +37,13 @@ - [`reth db list`](./cli/reth/db/list.md) - [`reth db diff`](./cli/reth/db/diff.md) - [`reth db get`](./cli/reth/db/get.md) + - [`reth db get mdbx`](./cli/reth/db/get/mdbx.md) + - [`reth db get static-file`](./cli/reth/db/get/static-file.md) - [`reth db drop`](./cli/reth/db/drop.md) - [`reth db clear`](./cli/reth/db/clear.md) - - [`reth db snapshot`](./cli/reth/db/snapshot.md) + - [`reth db clear mdbx`](./cli/reth/db/clear/mdbx.md) + - [`reth db clear static-file`](./cli/reth/db/clear/static-file.md) + - [`reth db create-static-files`](./cli/reth/db/create-static-files.md) - [`reth db version`](./cli/reth/db/version.md) - [`reth db path`](./cli/reth/db/path.md) - [`reth stage`](./cli/reth/stage.md) diff --git a/book/cli/SUMMARY.md b/book/cli/SUMMARY.md index 898ddeb86817..07711434e31f 100644 --- a/book/cli/SUMMARY.md +++ b/book/cli/SUMMARY.md @@ -8,9 +8,13 @@ - [`reth db list`](./reth/db/list.md) - [`reth db diff`](./reth/db/diff.md) - [`reth db get`](./reth/db/get.md) + - [`reth db get mdbx`](./reth/db/get/mdbx.md) + - [`reth db get static-file`](./reth/db/get/static-file.md) - [`reth db drop`](./reth/db/drop.md) - [`reth db clear`](./reth/db/clear.md) - - [`reth db snapshot`](./reth/db/snapshot.md) + - [`reth db clear mdbx`](./reth/db/clear/mdbx.md) + - [`reth db clear static-file`](./reth/db/clear/static-file.md) + - [`reth db create-static-files`](./reth/db/create-static-files.md) - [`reth db version`](./reth/db/version.md) - [`reth db path`](./reth/db/path.md) - [`reth stage`](./reth/stage.md) diff --git a/book/cli/reth/db.md b/book/cli/reth/db.md index 158b4b726a4a..7c72730ae16c 100644 --- a/book/cli/reth/db.md +++ b/book/cli/reth/db.md @@ -7,16 +7,16 @@ $ reth db --help Usage: reth db [OPTIONS] Commands: - stats Lists all the tables, their entry count and their size - list Lists the contents of a table - diff Create a diff between two database tables or two entire databases - get Gets the content of a table for the given key - drop Deletes all database entries - clear Deletes all table entries - snapshot Snapshots tables from database - version Lists current and local database versions - path Returns the full database path - help Print this message or the help of the given subcommand(s) + stats Lists all the tables, their entry count and their size + list Lists the contents of a table + diff Create a diff between two database tables or two entire databases + get Gets the content of a table for the given key + drop Deletes all database entries + clear Deletes all table entries + create-static-files Creates static files from database tables + version Lists current and local database versions + path Returns the full database path + help Print this message or the help of the given subcommand(s) Options: --datadir diff --git a/book/cli/reth/db/clear.md b/book/cli/reth/db/clear.md index c8dd59583fe1..f69e29b60622 100644 --- a/book/cli/reth/db/clear.md +++ b/book/cli/reth/db/clear.md @@ -4,11 +4,12 @@ Deletes all table entries ```bash $ reth db clear --help -Usage: reth db clear [OPTIONS] +Usage: reth db clear [OPTIONS] -Arguments: -
- Table name +Commands: + mdbx Deletes all database table entries + static-file Deletes all static file segment entries + help Print this message or the help of the given subcommand(s) Options: --datadir diff --git a/book/cli/reth/db/clear/mdbx.md b/book/cli/reth/db/clear/mdbx.md new file mode 100644 index 000000000000..e16697d395a4 --- /dev/null +++ b/book/cli/reth/db/clear/mdbx.md @@ -0,0 +1,124 @@ +# reth db clear mdbx + +Deletes all database table entries + +```bash +$ reth db clear mdbx --help +Usage: reth db clear mdbx [OPTIONS]
+ +Arguments: +
+ + +Options: + --datadir + The path to the data dir for all reth files and subdirectories. + + Defaults to the OS-specific data directory: + + - Linux: `$XDG_DATA_HOME/reth/` or `$HOME/.local/share/reth/` + - Windows: `{FOLDERID_RoamingAppData}/reth/` + - macOS: `$HOME/Library/Application Support/reth/` + + [default: default] + + --chain + The chain this node is running. + Possible values are either a built-in chain or the path to a chain specification file. + + Built-in chains: + mainnet, sepolia, goerli, holesky, dev + + [default: mainnet] + + --instance + Add a new instance of a node. + + Configures the ports of the node to avoid conflicts with the defaults. This is useful for running multiple nodes on the same machine. + + Max number of instances is 200. It is chosen in a way so that it's not possible to have port numbers that conflict with each other. + + Changes to the following port numbers: - DISCOVERY_PORT: default + `instance` - 1 - AUTH_PORT: default + `instance` * 100 - 100 - HTTP_RPC_PORT: default - `instance` + 1 - WS_RPC_PORT: default + `instance` * 2 - 2 + + [default: 1] + + -h, --help + Print help (see a summary with '-h') + +Logging: + --log.stdout.format + The format to use for logs written to stdout + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.stdout.filter + The filter to use for logs written to stdout + + [default: ] + + --log.file.format + The format to use for logs written to the log file + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.file.filter + The filter to use for logs written to the log file + + [default: debug] + + --log.file.directory + The path to put log files in + + [default: /logs] + + --log.file.max-size + The maximum size (in MB) of one log file + + [default: 200] + + --log.file.max-files + The maximum amount of log files that will be stored. If set to 0, background file logging is disabled + + [default: 5] + + --log.journald + Write logs to journald + + --log.journald.filter + The filter to use for logs written to journald + + [default: error] + + --color + Sets whether or not the formatter emits ANSI terminal escape codes for colors and other text formatting + + [default: always] + + Possible values: + - always: Colors on + - auto: Colors on + - never: Colors off + +Display: + -v, --verbosity... + Set the minimum log level. + + -v Errors + -vv Warnings + -vvv Info + -vvvv Debug + -vvvvv Traces (warning: very verbose!) + + -q, --quiet + Silence all log output +``` \ No newline at end of file diff --git a/book/cli/reth/db/clear/static-file.md b/book/cli/reth/db/clear/static-file.md new file mode 100644 index 000000000000..c41158b7af5b --- /dev/null +++ b/book/cli/reth/db/clear/static-file.md @@ -0,0 +1,127 @@ +# reth db clear static-file + +Deletes all static file segment entries + +```bash +$ reth db clear static-file --help +Usage: reth db clear static-file [OPTIONS] + +Arguments: + + Possible values: + - headers: Static File segment responsible for the `CanonicalHeaders`, `Headers`, `HeaderTerminalDifficulties` tables + - transactions: Static File segment responsible for the `Transactions` table + - receipts: Static File segment responsible for the `Receipts` table + +Options: + --datadir + The path to the data dir for all reth files and subdirectories. + + Defaults to the OS-specific data directory: + + - Linux: `$XDG_DATA_HOME/reth/` or `$HOME/.local/share/reth/` + - Windows: `{FOLDERID_RoamingAppData}/reth/` + - macOS: `$HOME/Library/Application Support/reth/` + + [default: default] + + --chain + The chain this node is running. + Possible values are either a built-in chain or the path to a chain specification file. + + Built-in chains: + mainnet, sepolia, goerli, holesky, dev + + [default: mainnet] + + --instance + Add a new instance of a node. + + Configures the ports of the node to avoid conflicts with the defaults. This is useful for running multiple nodes on the same machine. + + Max number of instances is 200. It is chosen in a way so that it's not possible to have port numbers that conflict with each other. + + Changes to the following port numbers: - DISCOVERY_PORT: default + `instance` - 1 - AUTH_PORT: default + `instance` * 100 - 100 - HTTP_RPC_PORT: default - `instance` + 1 - WS_RPC_PORT: default + `instance` * 2 - 2 + + [default: 1] + + -h, --help + Print help (see a summary with '-h') + +Logging: + --log.stdout.format + The format to use for logs written to stdout + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.stdout.filter + The filter to use for logs written to stdout + + [default: ] + + --log.file.format + The format to use for logs written to the log file + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.file.filter + The filter to use for logs written to the log file + + [default: debug] + + --log.file.directory + The path to put log files in + + [default: /logs] + + --log.file.max-size + The maximum size (in MB) of one log file + + [default: 200] + + --log.file.max-files + The maximum amount of log files that will be stored. If set to 0, background file logging is disabled + + [default: 5] + + --log.journald + Write logs to journald + + --log.journald.filter + The filter to use for logs written to journald + + [default: error] + + --color + Sets whether or not the formatter emits ANSI terminal escape codes for colors and other text formatting + + [default: always] + + Possible values: + - always: Colors on + - auto: Colors on + - never: Colors off + +Display: + -v, --verbosity... + Set the minimum log level. + + -v Errors + -vv Warnings + -vvv Info + -vvvv Debug + -vvvvv Traces (warning: very verbose!) + + -q, --quiet + Silence all log output +``` \ No newline at end of file diff --git a/book/cli/reth/db/clear/static_file.md b/book/cli/reth/db/clear/static_file.md new file mode 100644 index 000000000000..2c503dd714ac --- /dev/null +++ b/book/cli/reth/db/clear/static_file.md @@ -0,0 +1,127 @@ +# reth db clear static-file + +Deletes all static_file segment entries + +```bash +$ reth db clear static-file --help +Usage: reth db clear static-file [OPTIONS] + +Arguments: + + Possible values: + - headers: StaticFile segment responsible for the `CanonicalHeaders`, `Headers`, `HeaderTerminalDifficulties` tables + - transactions: StaticFile segment responsible for the `Transactions` table + - receipts: StaticFile segment responsible for the `Receipts` table + +Options: + --datadir + The path to the data dir for all reth files and subdirectories. + + Defaults to the OS-specific data directory: + + - Linux: `$XDG_DATA_HOME/reth/` or `$HOME/.local/share/reth/` + - Windows: `{FOLDERID_RoamingAppData}/reth/` + - macOS: `$HOME/Library/Application Support/reth/` + + [default: default] + + --chain + The chain this node is running. + Possible values are either a built-in chain or the path to a chain specification file. + + Built-in chains: + mainnet, sepolia, goerli, holesky, dev + + [default: mainnet] + + --instance + Add a new instance of a node. + + Configures the ports of the node to avoid conflicts with the defaults. This is useful for running multiple nodes on the same machine. + + Max number of instances is 200. It is chosen in a way so that it's not possible to have port numbers that conflict with each other. + + Changes to the following port numbers: - DISCOVERY_PORT: default + `instance` - 1 - AUTH_PORT: default + `instance` * 100 - 100 - HTTP_RPC_PORT: default - `instance` + 1 - WS_RPC_PORT: default + `instance` * 2 - 2 + + [default: 1] + + -h, --help + Print help (see a summary with '-h') + +Logging: + --log.stdout.format + The format to use for logs written to stdout + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.stdout.filter + The filter to use for logs written to stdout + + [default: ] + + --log.file.format + The format to use for logs written to the log file + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.file.filter + The filter to use for logs written to the log file + + [default: debug] + + --log.file.directory + The path to put log files in + + [default: /logs] + + --log.file.max-size + The maximum size (in MB) of one log file + + [default: 200] + + --log.file.max-files + The maximum amount of log files that will be stored. If set to 0, background file logging is disabled + + [default: 5] + + --log.journald + Write logs to journald + + --log.journald.filter + The filter to use for logs written to journald + + [default: error] + + --color + Sets whether or not the formatter emits ANSI terminal escape codes for colors and other text formatting + + [default: always] + + Possible values: + - always: Colors on + - auto: Colors on + - never: Colors off + +Display: + -v, --verbosity... + Set the minimum log level. + + -v Errors + -vv Warnings + -vvv Info + -vvvv Debug + -vvvvv Traces (warning: very verbose!) + + -q, --quiet + Silence all log output +``` \ No newline at end of file diff --git a/book/cli/reth/db/snapshot.md b/book/cli/reth/db/create-static-files.md similarity index 83% rename from book/cli/reth/db/snapshot.md rename to book/cli/reth/db/create-static-files.md index 8e4aef50126b..01094f925d81 100644 --- a/book/cli/reth/db/snapshot.md +++ b/book/cli/reth/db/create-static-files.md @@ -1,19 +1,19 @@ -# reth db snapshot +# reth db create-static-files -Snapshots tables from database +Creates static files from database tables ```bash -$ reth db snapshot --help -Usage: reth db snapshot [OPTIONS] [SEGMENTS]... +$ reth db create-static-files --help +Usage: reth db create-static-files [OPTIONS] [SEGMENTS]... Arguments: [SEGMENTS]... - Snapshot segments to generate + Static File segments to generate Possible values: - - headers: Snapshot segment responsible for the `CanonicalHeaders`, `Headers`, `HeaderTD` tables - - transactions: Snapshot segment responsible for the `Transactions` table - - receipts: Snapshot segment responsible for the `Receipts` table + - headers: Static File segment responsible for the `CanonicalHeaders`, `Headers`, `HeaderTerminalDifficulties` tables + - transactions: Static File segment responsible for the `Transactions` table + - receipts: Static File segment responsible for the `Receipts` table Options: --datadir @@ -28,12 +28,12 @@ Options: [default: default] -f, --from - Starting block for the snapshot + Starting block for the static file [default: 0] -b, --block-interval - Number of blocks in the snapshot + Number of blocks in the static file [default: 500000] @@ -47,18 +47,18 @@ Options: [default: mainnet] -p, --parallel - Sets the number of snapshots built in parallel. Note: Each parallel build is memory-intensive + Sets the number of static files built in parallel. Note: Each parallel build is memory-intensive [default: 1] --only-stats - Flag to skip snapshot creation and print snapshot files stats + Flag to skip static file creation and print static files stats --bench - Flag to enable database-to-snapshot benchmarking + Flag to enable database-to-static file benchmarking --only-bench - Flag to skip snapshot creation and only run benchmarks on existing snapshots + Flag to skip static file creation and only run benchmarks on existing static files -c, --compression Compression algorithms to use @@ -69,7 +69,7 @@ Options: - lz4: LZ4 compression algorithm - zstd: Zstandard (Zstd) compression algorithm - zstd-with-dictionary: Zstandard (Zstd) compression algorithm with a dictionary - - uncompressed: No compression, uncompressed snapshot + - uncompressed: No compression --with-filters Flag to enable inclusion list filters and PHFs diff --git a/book/cli/reth/db/get.md b/book/cli/reth/db/get.md index 17b155eaaae3..de2f83b56c37 100644 --- a/book/cli/reth/db/get.md +++ b/book/cli/reth/db/get.md @@ -4,19 +4,12 @@ Gets the content of a table for the given key ```bash $ reth db get --help -Usage: reth db get [OPTIONS]
[SUBKEY] +Usage: reth db get [OPTIONS] -Arguments: -
- The table name - - NOTE: The dupsort tables are not supported now. - - - The key to get content for - - [SUBKEY] - The subkey to get content for +Commands: + mdbx Gets the content of a database table for the given key + static-file Gets the content of a static file segment for the given key + help Print this message or the help of the given subcommand(s) Options: --datadir @@ -30,9 +23,6 @@ Options: [default: default] - --raw - Output bytes instead of human-readable decoded value - --chain The chain this node is running. Possible values are either a built-in chain or the path to a chain specification file. diff --git a/book/cli/reth/db/get/mdbx.md b/book/cli/reth/db/get/mdbx.md new file mode 100644 index 000000000000..bf6f0749463d --- /dev/null +++ b/book/cli/reth/db/get/mdbx.md @@ -0,0 +1,133 @@ +# reth db get mdbx + +Gets the content of a database table for the given key + +```bash +$ reth db get mdbx --help +Usage: reth db get mdbx [OPTIONS]
[SUBKEY] + +Arguments: +
+ + + + The key to get content for + + [SUBKEY] + The subkey to get content for + +Options: + --datadir + The path to the data dir for all reth files and subdirectories. + + Defaults to the OS-specific data directory: + + - Linux: `$XDG_DATA_HOME/reth/` or `$HOME/.local/share/reth/` + - Windows: `{FOLDERID_RoamingAppData}/reth/` + - macOS: `$HOME/Library/Application Support/reth/` + + [default: default] + + --raw + Output bytes instead of human-readable decoded value + + --chain + The chain this node is running. + Possible values are either a built-in chain or the path to a chain specification file. + + Built-in chains: + mainnet, sepolia, goerli, holesky, dev + + [default: mainnet] + + --instance + Add a new instance of a node. + + Configures the ports of the node to avoid conflicts with the defaults. This is useful for running multiple nodes on the same machine. + + Max number of instances is 200. It is chosen in a way so that it's not possible to have port numbers that conflict with each other. + + Changes to the following port numbers: - DISCOVERY_PORT: default + `instance` - 1 - AUTH_PORT: default + `instance` * 100 - 100 - HTTP_RPC_PORT: default - `instance` + 1 - WS_RPC_PORT: default + `instance` * 2 - 2 + + [default: 1] + + -h, --help + Print help (see a summary with '-h') + +Logging: + --log.stdout.format + The format to use for logs written to stdout + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.stdout.filter + The filter to use for logs written to stdout + + [default: ] + + --log.file.format + The format to use for logs written to the log file + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.file.filter + The filter to use for logs written to the log file + + [default: debug] + + --log.file.directory + The path to put log files in + + [default: /logs] + + --log.file.max-size + The maximum size (in MB) of one log file + + [default: 200] + + --log.file.max-files + The maximum amount of log files that will be stored. If set to 0, background file logging is disabled + + [default: 5] + + --log.journald + Write logs to journald + + --log.journald.filter + The filter to use for logs written to journald + + [default: error] + + --color + Sets whether or not the formatter emits ANSI terminal escape codes for colors and other text formatting + + [default: always] + + Possible values: + - always: Colors on + - auto: Colors on + - never: Colors off + +Display: + -v, --verbosity... + Set the minimum log level. + + -v Errors + -vv Warnings + -vvv Info + -vvvv Debug + -vvvvv Traces (warning: very verbose!) + + -q, --quiet + Silence all log output +``` \ No newline at end of file diff --git a/book/cli/reth/db/get/static-file.md b/book/cli/reth/db/get/static-file.md new file mode 100644 index 000000000000..a6addeffb8f3 --- /dev/null +++ b/book/cli/reth/db/get/static-file.md @@ -0,0 +1,133 @@ +# reth db get static-file + +Gets the content of a static file segment for the given key + +```bash +$ reth db get static-file --help +Usage: reth db get static-file [OPTIONS] + +Arguments: + + Possible values: + - headers: Static File segment responsible for the `CanonicalHeaders`, `Headers`, `HeaderTerminalDifficulties` tables + - transactions: Static File segment responsible for the `Transactions` table + - receipts: Static File segment responsible for the `Receipts` table + + + The key to get content for + +Options: + --datadir + The path to the data dir for all reth files and subdirectories. + + Defaults to the OS-specific data directory: + + - Linux: `$XDG_DATA_HOME/reth/` or `$HOME/.local/share/reth/` + - Windows: `{FOLDERID_RoamingAppData}/reth/` + - macOS: `$HOME/Library/Application Support/reth/` + + [default: default] + + --raw + Output bytes instead of human-readable decoded value + + --chain + The chain this node is running. + Possible values are either a built-in chain or the path to a chain specification file. + + Built-in chains: + mainnet, sepolia, goerli, holesky, dev + + [default: mainnet] + + --instance + Add a new instance of a node. + + Configures the ports of the node to avoid conflicts with the defaults. This is useful for running multiple nodes on the same machine. + + Max number of instances is 200. It is chosen in a way so that it's not possible to have port numbers that conflict with each other. + + Changes to the following port numbers: - DISCOVERY_PORT: default + `instance` - 1 - AUTH_PORT: default + `instance` * 100 - 100 - HTTP_RPC_PORT: default - `instance` + 1 - WS_RPC_PORT: default + `instance` * 2 - 2 + + [default: 1] + + -h, --help + Print help (see a summary with '-h') + +Logging: + --log.stdout.format + The format to use for logs written to stdout + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.stdout.filter + The filter to use for logs written to stdout + + [default: ] + + --log.file.format + The format to use for logs written to the log file + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.file.filter + The filter to use for logs written to the log file + + [default: debug] + + --log.file.directory + The path to put log files in + + [default: /logs] + + --log.file.max-size + The maximum size (in MB) of one log file + + [default: 200] + + --log.file.max-files + The maximum amount of log files that will be stored. If set to 0, background file logging is disabled + + [default: 5] + + --log.journald + Write logs to journald + + --log.journald.filter + The filter to use for logs written to journald + + [default: error] + + --color + Sets whether or not the formatter emits ANSI terminal escape codes for colors and other text formatting + + [default: always] + + Possible values: + - always: Colors on + - auto: Colors on + - never: Colors off + +Display: + -v, --verbosity... + Set the minimum log level. + + -v Errors + -vv Warnings + -vvv Info + -vvvv Debug + -vvvvv Traces (warning: very verbose!) + + -q, --quiet + Silence all log output +``` \ No newline at end of file diff --git a/book/cli/reth/db/get/static_file.md b/book/cli/reth/db/get/static_file.md new file mode 100644 index 000000000000..12df536f22bb --- /dev/null +++ b/book/cli/reth/db/get/static_file.md @@ -0,0 +1,133 @@ +# reth db get static-file + +Gets the content of a static_file segment for the given key + +```bash +$ reth db get static-file --help +Usage: reth db get static-file [OPTIONS] + +Arguments: + + Possible values: + - headers: StaticFile segment responsible for the `CanonicalHeaders`, `Headers`, `HeaderTerminalDifficulties` tables + - transactions: StaticFile segment responsible for the `Transactions` table + - receipts: StaticFile segment responsible for the `Receipts` table + + + The key to get content for + +Options: + --datadir + The path to the data dir for all reth files and subdirectories. + + Defaults to the OS-specific data directory: + + - Linux: `$XDG_DATA_HOME/reth/` or `$HOME/.local/share/reth/` + - Windows: `{FOLDERID_RoamingAppData}/reth/` + - macOS: `$HOME/Library/Application Support/reth/` + + [default: default] + + --raw + Output bytes instead of human-readable decoded value + + --chain + The chain this node is running. + Possible values are either a built-in chain or the path to a chain specification file. + + Built-in chains: + mainnet, sepolia, goerli, holesky, dev + + [default: mainnet] + + --instance + Add a new instance of a node. + + Configures the ports of the node to avoid conflicts with the defaults. This is useful for running multiple nodes on the same machine. + + Max number of instances is 200. It is chosen in a way so that it's not possible to have port numbers that conflict with each other. + + Changes to the following port numbers: - DISCOVERY_PORT: default + `instance` - 1 - AUTH_PORT: default + `instance` * 100 - 100 - HTTP_RPC_PORT: default - `instance` + 1 - WS_RPC_PORT: default + `instance` * 2 - 2 + + [default: 1] + + -h, --help + Print help (see a summary with '-h') + +Logging: + --log.stdout.format + The format to use for logs written to stdout + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.stdout.filter + The filter to use for logs written to stdout + + [default: ] + + --log.file.format + The format to use for logs written to the log file + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.file.filter + The filter to use for logs written to the log file + + [default: debug] + + --log.file.directory + The path to put log files in + + [default: /logs] + + --log.file.max-size + The maximum size (in MB) of one log file + + [default: 200] + + --log.file.max-files + The maximum amount of log files that will be stored. If set to 0, background file logging is disabled + + [default: 5] + + --log.journald + Write logs to journald + + --log.journald.filter + The filter to use for logs written to journald + + [default: error] + + --color + Sets whether or not the formatter emits ANSI terminal escape codes for colors and other text formatting + + [default: always] + + Possible values: + - always: Colors on + - auto: Colors on + - never: Colors off + +Display: + -v, --verbosity... + Set the minimum log level. + + -v Errors + -vv Warnings + -vvv Info + -vvvv Debug + -vvvvv Traces (warning: very verbose!) + + -q, --quiet + Silence all log output +``` \ No newline at end of file diff --git a/book/cli/reth/db/static_file.md b/book/cli/reth/db/static_file.md new file mode 100644 index 000000000000..a6f965075bdf --- /dev/null +++ b/book/cli/reth/db/static_file.md @@ -0,0 +1,174 @@ +# reth db static-file + +StaticFiles tables from database + +```bash +$ reth db static-file --help +Usage: reth db static-file [OPTIONS] [SEGMENTS]... + +Arguments: + [SEGMENTS]... + StaticFile segments to generate + + Possible values: + - headers: StaticFile segment responsible for the `CanonicalHeaders`, `Headers`, `HeaderTerminalDifficulties` tables + - transactions: StaticFile segment responsible for the `Transactions` table + - receipts: StaticFile segment responsible for the `Receipts` table + +Options: + --datadir + The path to the data dir for all reth files and subdirectories. + + Defaults to the OS-specific data directory: + + - Linux: `$XDG_DATA_HOME/reth/` or `$HOME/.local/share/reth/` + - Windows: `{FOLDERID_RoamingAppData}/reth/` + - macOS: `$HOME/Library/Application Support/reth/` + + [default: default] + + -f, --from + Starting block for the static_file + + [default: 0] + + -b, --block-interval + Number of blocks in the static_file + + [default: 500000] + + --chain + The chain this node is running. + Possible values are either a built-in chain or the path to a chain specification file. + + Built-in chains: + mainnet, sepolia, goerli, holesky, dev + + [default: mainnet] + + -p, --parallel + Sets the number of static files built in parallel. Note: Each parallel build is memory-intensive + + [default: 1] + + --only-stats + Flag to skip static_file creation and print static_file files stats + + --bench + Flag to enable database-to-static_file benchmarking + + --only-bench + Flag to skip static_file creation and only run benchmarks on existing static files + + -c, --compression + Compression algorithms to use + + [default: uncompressed] + + Possible values: + - lz4: LZ4 compression algorithm + - zstd: Zstandard (Zstd) compression algorithm + - zstd-with-dictionary: Zstandard (Zstd) compression algorithm with a dictionary + - uncompressed: No compression, uncompressed static_file + + --with-filters + Flag to enable inclusion list filters and PHFs + + --phf + Specifies the perfect hashing function to use + + Possible values: + - fmph: Fingerprint-Based Minimal Perfect Hash Function + - go-fmph: Fingerprint-Based Minimal Perfect Hash Function with Group Optimization + + --instance + Add a new instance of a node. + + Configures the ports of the node to avoid conflicts with the defaults. This is useful for running multiple nodes on the same machine. + + Max number of instances is 200. It is chosen in a way so that it's not possible to have port numbers that conflict with each other. + + Changes to the following port numbers: - DISCOVERY_PORT: default + `instance` - 1 - AUTH_PORT: default + `instance` * 100 - 100 - HTTP_RPC_PORT: default - `instance` + 1 - WS_RPC_PORT: default + `instance` * 2 - 2 + + [default: 1] + + -h, --help + Print help (see a summary with '-h') + +Logging: + --log.stdout.format + The format to use for logs written to stdout + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.stdout.filter + The filter to use for logs written to stdout + + [default: ] + + --log.file.format + The format to use for logs written to the log file + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.file.filter + The filter to use for logs written to the log file + + [default: debug] + + --log.file.directory + The path to put log files in + + [default: /logs] + + --log.file.max-size + The maximum size (in MB) of one log file + + [default: 200] + + --log.file.max-files + The maximum amount of log files that will be stored. If set to 0, background file logging is disabled + + [default: 5] + + --log.journald + Write logs to journald + + --log.journald.filter + The filter to use for logs written to journald + + [default: error] + + --color + Sets whether or not the formatter emits ANSI terminal escape codes for colors and other text formatting + + [default: always] + + Possible values: + - always: Colors on + - auto: Colors on + - never: Colors off + +Display: + -v, --verbosity... + Set the minimum log level. + + -v Errors + -vv Warnings + -vvv Info + -vvvv Debug + -vvvvv Traces (warning: very verbose!) + + -q, --quiet + Silence all log output +``` diff --git a/book/cli/reth/db/stats.md b/book/cli/reth/db/stats.md index 8fa61d1a8079..dea5e3d058c0 100644 --- a/book/cli/reth/db/stats.md +++ b/book/cli/reth/db/stats.md @@ -18,6 +18,9 @@ Options: [default: default] + --only-total-size + Show only the total size for static files + --chain The chain this node is running. Possible values are either a built-in chain or the path to a chain specification file. @@ -27,6 +30,9 @@ Options: [default: mainnet] + --summary + Show only the summary per static file segment + --instance Add a new instance of a node. diff --git a/book/cli/reth/node.md b/book/cli/reth/node.md index 655861ba43a9..80072c1ab479 100644 --- a/book/cli/reth/node.md +++ b/book/cli/reth/node.md @@ -131,14 +131,14 @@ Networking: --pooled-tx-response-soft-limit Soft limit for the byte size of a [`PooledTransactions`](reth_eth_wire::PooledTransactions) response on assembling a [`GetPooledTransactions`](reth_eth_wire::GetPooledTransactions) request. Spec'd at 2 MiB. - + . - + [default: 2097152] --pooled-tx-pack-soft-limit Default soft limit for the byte size of a [`PooledTransactions`](reth_eth_wire::PooledTransactions) response on assembling a [`GetPooledTransactions`](reth_eth_wire::PooledTransactions) request. This defaults to less than the [`SOFT_LIMIT_BYTE_SIZE_POOLED_TRANSACTIONS_RESPONSE`], at 2 MiB, used when assembling a [`PooledTransactions`](reth_eth_wire::PooledTransactions) response. Default is 128 KiB - + [default: 131072] RPC: @@ -533,4 +533,3 @@ Display: -q, --quiet Silence all log output -``` \ No newline at end of file diff --git a/book/cli/reth/stage/drop.md b/book/cli/reth/stage/drop.md index d6c912b4cc29..88a0197990d3 100644 --- a/book/cli/reth/stage/drop.md +++ b/book/cli/reth/stage/drop.md @@ -57,18 +57,17 @@ Database: Possible values: - - headers: The headers stage within the pipeline - - bodies: The bodies stage within the pipeline - - senders: The senders stage within the pipeline - - execution: The execution stage within the pipeline - - account-hashing: The account hashing stage within the pipeline - - storage-hashing: The storage hashing stage within the pipeline - - hashing: The hashing stage within the pipeline - - merkle: The Merkle stage within the pipeline - - tx-lookup: The transaction lookup stage within the pipeline - - account-history: The account history stage within the pipeline - - storage-history: The storage history stage within the pipeline - - total-difficulty: The total difficulty stage within the pipeline + - headers: The headers stage within the pipeline + - bodies: The bodies stage within the pipeline + - senders: The senders stage within the pipeline + - execution: The execution stage within the pipeline + - account-hashing: The account hashing stage within the pipeline + - storage-hashing: The storage hashing stage within the pipeline + - hashing: The hashing stage within the pipeline + - merkle: The Merkle stage within the pipeline + - tx-lookup: The transaction lookup stage within the pipeline + - account-history: The account history stage within the pipeline + - storage-history: The storage history stage within the pipeline Logging: --log.stdout.format diff --git a/book/cli/reth/stage/dump/account-hashing.md b/book/cli/reth/stage/dump/account-hashing.md index 4a575388c329..c8b6069fad5b 100644 --- a/book/cli/reth/stage/dump/account-hashing.md +++ b/book/cli/reth/stage/dump/account-hashing.md @@ -4,11 +4,11 @@ AccountHashing stage ```bash $ reth stage dump account-hashing --help -Usage: reth stage dump account-hashing [OPTIONS] --output-db --from --to +Usage: reth stage dump account-hashing [OPTIONS] --output-datadir --from --to Options: - --output-db - The path to the new database folder. + --output-datadir + The path to the new datadir folder. -f, --from From which block diff --git a/book/cli/reth/stage/dump/execution.md b/book/cli/reth/stage/dump/execution.md index 4d6b0ce5d8e2..8ff064a70cc2 100644 --- a/book/cli/reth/stage/dump/execution.md +++ b/book/cli/reth/stage/dump/execution.md @@ -4,11 +4,11 @@ Execution stage ```bash $ reth stage dump execution --help -Usage: reth stage dump execution [OPTIONS] --output-db --from --to +Usage: reth stage dump execution [OPTIONS] --output-datadir --from --to Options: - --output-db - The path to the new database folder. + --output-datadir + The path to the new datadir folder. -f, --from From which block diff --git a/book/cli/reth/stage/dump/merkle.md b/book/cli/reth/stage/dump/merkle.md index c5c04b683710..ec5d142c4728 100644 --- a/book/cli/reth/stage/dump/merkle.md +++ b/book/cli/reth/stage/dump/merkle.md @@ -4,11 +4,11 @@ Merkle stage ```bash $ reth stage dump merkle --help -Usage: reth stage dump merkle [OPTIONS] --output-db --from --to +Usage: reth stage dump merkle [OPTIONS] --output-datadir --from --to Options: - --output-db - The path to the new database folder. + --output-datadir + The path to the new datadir folder. -f, --from From which block diff --git a/book/cli/reth/stage/dump/storage-hashing.md b/book/cli/reth/stage/dump/storage-hashing.md index 9223b445b4d3..6a45c5d1ab94 100644 --- a/book/cli/reth/stage/dump/storage-hashing.md +++ b/book/cli/reth/stage/dump/storage-hashing.md @@ -4,11 +4,11 @@ StorageHashing stage ```bash $ reth stage dump storage-hashing --help -Usage: reth stage dump storage-hashing [OPTIONS] --output-db --from --to +Usage: reth stage dump storage-hashing [OPTIONS] --output-datadir --from --to Options: - --output-db - The path to the new database folder. + --output-datadir + The path to the new datadir folder. -f, --from From which block diff --git a/book/cli/reth/stage/run.md b/book/cli/reth/stage/run.md index c27b0f457baf..a7d8d61bcd22 100644 --- a/book/cli/reth/stage/run.md +++ b/book/cli/reth/stage/run.md @@ -11,18 +11,17 @@ Arguments: The name of the stage to run Possible values: - - headers: The headers stage within the pipeline - - bodies: The bodies stage within the pipeline - - senders: The senders stage within the pipeline - - execution: The execution stage within the pipeline - - account-hashing: The account hashing stage within the pipeline - - storage-hashing: The storage hashing stage within the pipeline - - hashing: The hashing stage within the pipeline - - merkle: The Merkle stage within the pipeline - - tx-lookup: The transaction lookup stage within the pipeline - - account-history: The account history stage within the pipeline - - storage-history: The storage history stage within the pipeline - - total-difficulty: The total difficulty stage within the pipeline + - headers: The headers stage within the pipeline + - bodies: The bodies stage within the pipeline + - senders: The senders stage within the pipeline + - execution: The execution stage within the pipeline + - account-hashing: The account hashing stage within the pipeline + - storage-hashing: The storage hashing stage within the pipeline + - hashing: The hashing stage within the pipeline + - merkle: The Merkle stage within the pipeline + - tx-lookup: The transaction lookup stage within the pipeline + - account-history: The account history stage within the pipeline + - storage-history: The storage history stage within the pipeline Options: --config @@ -152,6 +151,18 @@ Networking: --max-inbound-peers Maximum number of inbound requests. default: 30 + --pooled-tx-response-soft-limit + Soft limit for the byte size of a [`PooledTransactions`](reth_eth_wire::PooledTransactions) response on assembling a [`GetPooledTransactions`](reth_eth_wire::GetPooledTransactions) request. Spec'd at 2 MiB. + + . + + [default: 2097152] + + --pooled-tx-pack-soft-limit + Default soft limit for the byte size of a [`PooledTransactions`](reth_eth_wire::PooledTransactions) response on assembling a [`GetPooledTransactions`](reth_eth_wire::PooledTransactions) request. This defaults to less than the [`SOFT_LIMIT_BYTE_SIZE_POOLED_TRANSACTIONS_RESPONSE`], at 2 MiB, used when assembling a [`PooledTransactions`](reth_eth_wire::PooledTransactions) response. Default is 128 KiB + + [default: 131072] + Database: --db.log-level Database logging level. Levels higher than "notice" require a debug build diff --git a/book/run/config.md b/book/run/config.md index d5889c6a22ac..9c770dae73e0 100644 --- a/book/run/config.md +++ b/book/run/config.md @@ -12,7 +12,6 @@ The configuration file contains the following sections: - [`[stages]`](#the-stages-section) -- Configuration of the individual sync stages - [`headers`](#headers) - - [`total_difficulty`](#total_difficulty) - [`bodies`](#bodies) - [`sender_recovery`](#sender_recovery) - [`execution`](#execution) @@ -64,20 +63,6 @@ downloader_request_limit = 1000 commit_threshold = 10000 ``` -### `total_difficulty` - -The total difficulty stage calculates the total difficulty reached for each header in the chain. - -```toml -[stages.total_difficulty] -# The amount of headers to calculate the total difficulty for -# before writing the results to disk. -# -# Lower thresholds correspond to more frequent disk I/O (writes), -# but lowers memory usage -commit_threshold = 100000 -``` - ### `bodies` The bodies section controls both the behavior of the bodies stage, which download historical block bodies, as well as the primary downloader that fetches block bodies over P2P. @@ -207,7 +192,7 @@ The transaction lookup stage builds an index of transaction hashes to their sequ # # Lower thresholds correspond to more frequent disk I/O (writes), # but lowers memory usage -commit_threshold = 5000000 +chunk_size = 5000000 ``` ### `index_account_history` diff --git a/book/run/observability.md b/book/run/observability.md index 4ab2951805e4..39d485e1f362 100644 --- a/book/run/observability.md +++ b/book/run/observability.md @@ -12,7 +12,7 @@ Now, as the node is running, you can `curl` the endpoint you provided to the `-- curl 127.0.0.1:9001 ``` -The response from this is quite descriptive, but it can be a bit verbose. Plus, it's just a snapshot of the metrics at the time that you `curl`ed the endpoint. +The response from this is quite descriptive, but it can be a bit verbose. Plus, it's just a static_file of the metrics at the time that you `curl`ed the endpoint. You can run the following command in a separate terminal to periodically poll the endpoint, and just print the values (without the header text) to the terminal: diff --git a/book/run/pruning.md b/book/run/pruning.md index acfb93c9cb09..6800b7f5fa17 100644 --- a/book/run/pruning.md +++ b/book/run/pruning.md @@ -2,7 +2,7 @@ > Pruning and full node are new features of Reth, > and we will be happy to hear about your experience using them either -> on [GitHub](https://github.com/paradigmxyz/reth/issues) or in the [Telegram group](https://t.me/paradigm_reth). +> on [GitHub](https://github.com/paradigmxyz/reth/issues) or in the [Telegram group](https://t.me/paradigm_reth). By default, Reth runs as an archive node. Such nodes have all historical blocks and the state at each of these blocks available for querying and tracing. @@ -14,14 +14,15 @@ the steps for running Reth as a full node, what caveats to expect and how to con - Archive node – Reth node that has all historical data from genesis. - Pruned node – Reth node that has its historical data pruned partially or fully through -a [custom configuration](./config.md#the-prune-section). + a [custom configuration](./config.md#the-prune-section). - Full Node – Reth node that has the latest state and historical data for only the last 10064 blocks available -for querying in the same way as an archive node. + for querying in the same way as an archive node. The node type that was chosen when first [running a node](./run-a-node.md) **can not** be changed after the initial sync. Turning Archive into Pruned, or Pruned into Full is not supported. ## Modes + ### Archive Node Default mode, follow the steps from the previous chapter on [how to run on mainnet or official testnets](./mainnet.md). @@ -36,6 +37,7 @@ the previous chapter on [how to run on mainnet or official testnets](./mainnet.m To run Reth as a full node, follow the steps from the previous chapter on [how to run on mainnet or official testnets](./mainnet.md), and add a `--full` flag. For example: + ```bash RUST_LOG=info reth node \ --full \ @@ -61,7 +63,7 @@ Different segments take up different amounts of disk space. If pruned fully, this is the total freed space you'll get, per segment: | Segment | Size | -|--------------------|-------| +| ------------------ | ----- | | Sender Recovery | 75GB | | Transaction Lookup | 150GB | | Receipts | 250GB | @@ -73,6 +75,7 @@ If pruned fully, this is the total freed space you'll get, per segment: Full node occupies at least 950GB. Essentially, the full node is the same as following configuration for the pruned node: + ```toml [prune] block_interval = 5 @@ -91,15 +94,18 @@ storage_history = { distance = 10_064 } ``` Meaning, it prunes: + - Account History and Storage History up to the last 10064 blocks - All of Sender Recovery data. The caveat is that it's pruned gradually after the initial sync -is completed, so the disk space is reclaimed slowly. + is completed, so the disk space is reclaimed slowly. - Receipts up to the last 10064 blocks, preserving all receipts with the logs from Beacon Deposit Contract Given the aforementioned segment sizes, we get the following full node size: + ```text -Archive Node - Receipts - AccountHistory - StorageHistory = Full Node +Archive Node - Receipts - AccountsHistory - StoragesHistory = Full Node ``` + ```text 2.14TB - 250GB - 240GB - 700GB = 950GB ``` @@ -108,6 +114,7 @@ Archive Node - Receipts - AccountHistory - StorageHistory = Full Node As it was mentioned in the [pruning configuration chapter](./config.md#the-prune-section), there are several segments which can be pruned independently of each other: + - Sender Recovery - Transaction Lookup - Receipts @@ -121,11 +128,10 @@ become unavailable. The following tables describe RPC methods available in the full node. - #### `debug` namespace | RPC | Note | -|----------------------------|------------------------------------------------------------| +| -------------------------- | ---------------------------------------------------------- | | `debug_getRawBlock` | | | `debug_getRawHeader` | | | `debug_getRawReceipts` | Only for the last 10064 blocks and Beacon Deposit Contract | @@ -137,11 +143,10 @@ The following tables describe RPC methods available in the full node. | `debug_traceCallMany` | Only for the last 10064 blocks | | `debug_traceTransaction` | Only for the last 10064 blocks | - #### `eth` namespace | RPC / Segment | Note | -|-------------------------------------------|------------------------------------------------------------| +| ----------------------------------------- | ---------------------------------------------------------- | | `eth_accounts` | | | `eth_blockNumber` | | | `eth_call` | Only for the last 10064 blocks | @@ -189,7 +194,7 @@ The following tables describe RPC methods available in the full node. #### `net` namespace | RPC / Segment | -|-----------------| +| --------------- | | `net_listening` | | `net_peerCount` | | `net_version` | @@ -197,7 +202,7 @@ The following tables describe RPC methods available in the full node. #### `trace` namespace | RPC / Segment | Note | -|---------------------------------|--------------------------------| +| ------------------------------- | ------------------------------ | | `trace_block` | Only for the last 10064 blocks | | `trace_call` | Only for the last 10064 blocks | | `trace_callMany` | Only for the last 10064 blocks | @@ -210,109 +215,108 @@ The following tables describe RPC methods available in the full node. #### `txpool` namespace | RPC / Segment | -|----------------------| +| -------------------- | | `txpool_content` | | `txpool_contentFrom` | | `txpool_inspect` | | `txpool_status` | - ### Pruned Node The following tables describe the requirements for prune segments, per RPC method: + - ✅ – if the segment is pruned, the RPC method still works - ❌ - if the segment is pruned, the RPC method doesn't work anymore #### `debug` namespace | RPC / Segment | Sender Recovery | Transaction Lookup | Receipts | Account History | Storage History | -|----------------------------|-----------------|--------------------|----------|-----------------|-----------------| -| `debug_getRawBlock` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `debug_getRawHeader` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `debug_getRawReceipts` | ✅ | ✅ | ❌ | ✅ | ✅ | -| `debug_getRawTransaction` | ✅ | ❌ | ✅ | ✅ | ✅ | -| `debug_traceBlock` | ✅ | ✅ | ✅ | ❌ | ❌ | -| `debug_traceBlockByHash` | ✅ | ✅ | ✅ | ❌ | ❌ | -| `debug_traceBlockByNumber` | ✅ | ✅ | ✅ | ❌ | ❌ | -| `debug_traceCall` | ✅ | ✅ | ✅ | ❌ | ❌ | -| `debug_traceCallMany` | ✅ | ✅ | ✅ | ❌ | ❌ | -| `debug_traceTransaction` | ✅ | ✅ | ✅ | ❌ | ❌ | - +| -------------------------- | --------------- | ------------------ | -------- | --------------- | --------------- | +| `debug_getRawBlock` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `debug_getRawHeader` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `debug_getRawReceipts` | ✅ | ✅ | ❌ | ✅ | ✅ | +| `debug_getRawTransaction` | ✅ | ❌ | ✅ | ✅ | ✅ | +| `debug_traceBlock` | ✅ | ✅ | ✅ | ❌ | ❌ | +| `debug_traceBlockByHash` | ✅ | ✅ | ✅ | ❌ | ❌ | +| `debug_traceBlockByNumber` | ✅ | ✅ | ✅ | ❌ | ❌ | +| `debug_traceCall` | ✅ | ✅ | ✅ | ❌ | ❌ | +| `debug_traceCallMany` | ✅ | ✅ | ✅ | ❌ | ❌ | +| `debug_traceTransaction` | ✅ | ✅ | ✅ | ❌ | ❌ | #### `eth` namespace | RPC / Segment | Sender Recovery | Transaction Lookup | Receipts | Account History | Storage History | -|-------------------------------------------|-----------------|--------------------|----------|-----------------|-----------------| -| `eth_accounts` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `eth_blockNumber` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `eth_call` | ✅ | ✅ | ✅ | ❌ | ❌ | -| `eth_chainId` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `eth_createAccessList` | ✅ | ✅ | ✅ | ❌ | ❌ | -| `eth_estimateGas` | ✅ | ✅ | ✅ | ❌ | ❌ | -| `eth_feeHistory` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `eth_gasPrice` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `eth_getBalance` | ✅ | ✅ | ✅ | ❌ | ✅ | -| `eth_getBlockByHash` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `eth_getBlockByNumber` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `eth_getBlockReceipts` | ✅ | ✅ | ❌ | ✅ | ✅ | -| `eth_getBlockTransactionCountByHash` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `eth_getBlockTransactionCountByNumber` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `eth_getCode` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `eth_getFilterChanges` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `eth_getFilterLogs` | ✅ | ✅ | ❌ | ✅ | ✅ | -| `eth_getLogs` | ✅ | ✅ | ❌ | ✅ | ✅ | -| `eth_getStorageAt` | ✅ | ✅ | ✅ | ✅ | ❌ | -| `eth_getTransactionByBlockHashAndIndex` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `eth_getTransactionByBlockNumberAndIndex` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `eth_getTransactionByHash` | ✅ | ❌ | ✅ | ✅ | ✅ | -| `eth_getTransactionCount` | ✅ | ✅ | ✅ | ❌ | ✅ | -| `eth_getTransactionReceipt` | ✅ | ❌ | ❌ | ✅ | ✅ | -| `eth_getUncleByBlockHashAndIndex` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `eth_getUncleByBlockNumberAndIndex` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `eth_getUncleCountByBlockHash` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `eth_getUncleCountByBlockNumber` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `eth_maxPriorityFeePerGas` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `eth_mining` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `eth_newBlockFilter` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `eth_newFilter` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `eth_newPendingTransactionFilter` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `eth_protocolVersion` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `eth_sendRawTransaction` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `eth_sendTransaction` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `eth_sign` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `eth_signTransaction` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `eth_signTypedData` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `eth_subscribe` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `eth_syncing` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `eth_uninstallFilter` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `eth_unsubscribe` | ✅ | ✅ | ✅ | ✅ | ✅ | +| ----------------------------------------- | --------------- | ------------------ | -------- | --------------- | --------------- | +| `eth_accounts` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `eth_blockNumber` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `eth_call` | ✅ | ✅ | ✅ | ❌ | ❌ | +| `eth_chainId` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `eth_createAccessList` | ✅ | ✅ | ✅ | ❌ | ❌ | +| `eth_estimateGas` | ✅ | ✅ | ✅ | ❌ | ❌ | +| `eth_feeHistory` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `eth_gasPrice` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `eth_getBalance` | ✅ | ✅ | ✅ | ❌ | ✅ | +| `eth_getBlockByHash` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `eth_getBlockByNumber` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `eth_getBlockReceipts` | ✅ | ✅ | ❌ | ✅ | ✅ | +| `eth_getBlockTransactionCountByHash` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `eth_getBlockTransactionCountByNumber` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `eth_getCode` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `eth_getFilterChanges` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `eth_getFilterLogs` | ✅ | ✅ | ❌ | ✅ | ✅ | +| `eth_getLogs` | ✅ | ✅ | ❌ | ✅ | ✅ | +| `eth_getStorageAt` | ✅ | ✅ | ✅ | ✅ | ❌ | +| `eth_getTransactionByBlockHashAndIndex` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `eth_getTransactionByBlockNumberAndIndex` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `eth_getTransactionByHash` | ✅ | ❌ | ✅ | ✅ | ✅ | +| `eth_getTransactionCount` | ✅ | ✅ | ✅ | ❌ | ✅ | +| `eth_getTransactionReceipt` | ✅ | ❌ | ❌ | ✅ | ✅ | +| `eth_getUncleByBlockHashAndIndex` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `eth_getUncleByBlockNumberAndIndex` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `eth_getUncleCountByBlockHash` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `eth_getUncleCountByBlockNumber` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `eth_maxPriorityFeePerGas` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `eth_mining` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `eth_newBlockFilter` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `eth_newFilter` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `eth_newPendingTransactionFilter` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `eth_protocolVersion` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `eth_sendRawTransaction` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `eth_sendTransaction` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `eth_sign` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `eth_signTransaction` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `eth_signTypedData` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `eth_subscribe` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `eth_syncing` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `eth_uninstallFilter` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `eth_unsubscribe` | ✅ | ✅ | ✅ | ✅ | ✅ | #### `net` namespace | RPC / Segment | Sender Recovery | Transaction Lookup | Receipts | Account History | Storage History | -|-----------------|-----------------|--------------------|----------|-----------------|-----------------| -| `net_listening` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `net_peerCount` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `net_version` | ✅ | ✅ | ✅ | ✅ | ✅ | +| --------------- | --------------- | ------------------ | -------- | --------------- | --------------- | +| `net_listening` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `net_peerCount` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `net_version` | ✅ | ✅ | ✅ | ✅ | ✅ | #### `trace` namespace | RPC / Segment | Sender Recovery | Transaction Lookup | Receipts | Account History | Storage History | -|---------------------------------|-----------------|--------------------|----------|-----------------|-----------------| -| `trace_block` | ✅ | ✅ | ✅ | ❌ | ❌ | -| `trace_call` | ✅ | ✅ | ✅ | ❌ | ❌ | -| `trace_callMany` | ✅ | ✅ | ✅ | ❌ | ❌ | -| `trace_get` | ✅ | ❌ | ✅ | ❌ | ❌ | -| `trace_rawTransaction` | ✅ | ✅ | ✅ | ❌ | ❌ | -| `trace_replayBlockTransactions` | ✅ | ✅ | ✅ | ❌ | ❌ | -| `trace_replayTransaction` | ✅ | ❌ | ✅ | ❌ | ❌ | -| `trace_transaction` | ✅ | ❌ | ✅ | ❌ | ❌ | +| ------------------------------- | --------------- | ------------------ | -------- | --------------- | --------------- | +| `trace_block` | ✅ | ✅ | ✅ | ❌ | ❌ | +| `trace_call` | ✅ | ✅ | ✅ | ❌ | ❌ | +| `trace_callMany` | ✅ | ✅ | ✅ | ❌ | ❌ | +| `trace_get` | ✅ | ❌ | ✅ | ❌ | ❌ | +| `trace_rawTransaction` | ✅ | ✅ | ✅ | ❌ | ❌ | +| `trace_replayBlockTransactions` | ✅ | ✅ | ✅ | ❌ | ❌ | +| `trace_replayTransaction` | ✅ | ❌ | ✅ | ❌ | ❌ | +| `trace_transaction` | ✅ | ❌ | ✅ | ❌ | ❌ | #### `txpool` namespace | RPC / Segment | Sender Recovery | Transaction Lookup | Receipts | Account History | Storage History | -|----------------------|-----------------|--------------------|----------|-----------------|-----------------| -| `txpool_content` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `txpool_contentFrom` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `txpool_inspect` | ✅ | ✅ | ✅ | ✅ | ✅ | -| `txpool_status` | ✅ | ✅ | ✅ | ✅ | ✅ | +| -------------------- | --------------- | ------------------ | -------- | --------------- | --------------- | +| `txpool_content` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `txpool_contentFrom` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `txpool_inspect` | ✅ | ✅ | ✅ | ✅ | ✅ | +| `txpool_status` | ✅ | ✅ | ✅ | ✅ | ✅ | diff --git a/crates/blockchain-tree/src/blockchain_tree.rs b/crates/blockchain-tree/src/blockchain_tree.rs index 9a03e470ac28..72ef5dbfc01e 100644 --- a/crates/blockchain-tree/src/blockchain_tree.rs +++ b/crates/blockchain-tree/src/blockchain_tree.rs @@ -1294,7 +1294,7 @@ mod tests { let provider = factory.provider_rw().unwrap(); provider - .insert_block( + .insert_historical_block( genesis.try_seal_with_senders().expect("invalid tx signature in genesis"), None, ) @@ -1309,7 +1309,7 @@ mod tests { } provider .tx_ref() - .put::("Finish".to_string(), StageCheckpoint::new(10)) + .put::("Finish".to_string(), StageCheckpoint::new(10)) .unwrap(); provider.commit().unwrap(); } @@ -1423,7 +1423,7 @@ mod tests { .unwrap(); let account = Account { balance: initial_signer_balance, ..Default::default() }; provider_rw.tx_ref().put::(signer, account).unwrap(); - provider_rw.tx_ref().put::(keccak256(signer), account).unwrap(); + provider_rw.tx_ref().put::(keccak256(signer), account).unwrap(); provider_rw.commit().unwrap(); } diff --git a/crates/blockchain-tree/src/externals.rs b/crates/blockchain-tree/src/externals.rs index 150a09c66773..5a288271e76e 100644 --- a/crates/blockchain-tree/src/externals.rs +++ b/crates/blockchain-tree/src/externals.rs @@ -1,9 +1,11 @@ //! Blockchain tree externals. -use reth_db::{cursor::DbCursorRO, database::Database, tables, transaction::DbTx}; +use reth_db::{ + cursor::DbCursorRO, database::Database, static_file::HeaderMask, tables, transaction::DbTx, +}; use reth_interfaces::{consensus::Consensus, RethResult}; -use reth_primitives::{BlockHash, BlockNumber}; -use reth_provider::ProviderFactory; +use reth_primitives::{BlockHash, BlockNumber, StaticFileSegment}; +use reth_provider::{ProviderFactory, StatsReader}; use std::{collections::BTreeMap, sync::Arc}; /// A container for external components. @@ -44,13 +46,39 @@ impl TreeExternals { &self, num_hashes: usize, ) -> RethResult> { - Ok(self + // Fetch the latest canonical hashes from the database + let mut hashes = self .provider_factory .provider()? .tx_ref() .cursor_read::()? .walk_back(None)? .take(num_hashes) - .collect::, _>>()?) + .collect::, _>>()?; + + // Fetch the same number of latest canonical hashes from the static_files and merge them + // with the database hashes. It is needed due to the fact that we're writing + // directly to static_files in pipeline sync, but to the database in live sync, + // which means that the latest canonical hashes in the static file might be more recent + // than in the database, and vice versa, or even some ranges of the latest + // `num_hashes` blocks may be in database, and some ranges in static_files. + let static_file_provider = self.provider_factory.static_file_provider(); + let total_headers = static_file_provider.count_entries::()? as u64; + if total_headers > 0 { + let range = + total_headers.saturating_sub(1).saturating_sub(num_hashes as u64)..total_headers; + + hashes.extend(range.clone().zip(static_file_provider.fetch_range_with_predicate( + StaticFileSegment::Headers, + range, + |cursor, number| cursor.get_one::>(number.into()), + |_| true, + )?)); + } + + // We may have fetched more than `num_hashes` hashes, so we need to truncate the result to + // the requested number. + let hashes = hashes.into_iter().rev().take(num_hashes).collect(); + Ok(hashes) } } diff --git a/crates/config/src/config.rs b/crates/config/src/config.rs index 99401bd1e52a..cc3741fc5c15 100644 --- a/crates/config/src/config.rs +++ b/crates/config/src/config.rs @@ -52,8 +52,6 @@ impl Config { pub struct StageConfig { /// Header stage configuration. pub headers: HeadersConfig, - /// Total Difficulty stage configuration - pub total_difficulty: TotalDifficultyConfig, /// Body stage configuration. pub bodies: BodiesConfig, /// Sender Recovery stage configuration. @@ -107,21 +105,6 @@ impl Default for HeadersConfig { } } -/// Total difficulty stage configuration -#[derive(Debug, Clone, Copy, Deserialize, PartialEq, Serialize)] -#[serde(default)] -pub struct TotalDifficultyConfig { - /// The maximum number of total difficulty entries to sum up before committing progress to the - /// database. - pub commit_threshold: u64, -} - -impl Default for TotalDifficultyConfig { - fn default() -> Self { - Self { commit_threshold: 100_000 } - } -} - /// Body stage configuration. #[derive(Debug, Clone, Copy, Deserialize, PartialEq, Serialize)] #[serde(default)] @@ -242,13 +225,13 @@ impl Default for MerkleConfig { #[derive(Debug, Clone, Copy, Deserialize, PartialEq, Serialize)] #[serde(default)] pub struct TransactionLookupConfig { - /// The maximum number of transactions to process before committing progress to the database. - pub commit_threshold: u64, + /// The maximum number of transactions to process before writing to disk. + pub chunk_size: u64, } impl Default for TransactionLookupConfig { fn default() -> Self { - Self { commit_threshold: 5_000_000 } + Self { chunk_size: 5_000_000 } } } @@ -359,9 +342,6 @@ downloader_max_buffered_responses = 100 downloader_request_limit = 1000 commit_threshold = 10000 -[stages.total_difficulty] -commit_threshold = 100000 - [stages.bodies] downloader_request_limit = 200 downloader_stream_batch_size = 1000 @@ -388,7 +368,7 @@ commit_threshold = 100000 clean_threshold = 50000 [stages.transaction_lookup] -commit_threshold = 5000000 +chunk_size = 5000000 [stages.index_account_history] commit_threshold = 100000 diff --git a/crates/consensus/beacon/Cargo.toml b/crates/consensus/beacon/Cargo.toml index 9672b98ca8ed..a6407240ba09 100644 --- a/crates/consensus/beacon/Cargo.toml +++ b/crates/consensus/beacon/Cargo.toml @@ -23,7 +23,7 @@ reth-tasks.workspace = true reth-payload-builder.workspace = true reth-payload-validator.workspace = true reth-prune.workspace = true -reth-snapshot.workspace = true +reth-static-file.workspace = true reth-tokio-util.workspace = true reth-node-api.workspace = true @@ -59,6 +59,7 @@ reth-node-ethereum.workspace = true reth-node-optimism.workspace = true assert_matches.workspace = true +tempfile.workspace = true [features] optimism = [ diff --git a/crates/consensus/beacon/src/engine/hooks/controller.rs b/crates/consensus/beacon/src/engine/hooks/controller.rs index 77e3ed2701c7..33ae74c83e9b 100644 --- a/crates/consensus/beacon/src/engine/hooks/controller.rs +++ b/crates/consensus/beacon/src/engine/hooks/controller.rs @@ -10,7 +10,6 @@ use tracing::debug; #[derive(Debug)] pub(crate) struct PolledHook { - #[allow(dead_code)] pub(crate) name: &'static str, pub(crate) event: EngineHookEvent, pub(crate) db_access_level: EngineHookDBAccessLevel, @@ -151,6 +150,8 @@ impl EngineHooksController { ); return Poll::Ready(Ok(result)) + } else { + debug!(target: "consensus::engine::hooks", hook = hook.name(), "Next hook is not ready"); } Poll::Pending diff --git a/crates/consensus/beacon/src/engine/hooks/mod.rs b/crates/consensus/beacon/src/engine/hooks/mod.rs index 69101db1f698..ce149717ada0 100644 --- a/crates/consensus/beacon/src/engine/hooks/mod.rs +++ b/crates/consensus/beacon/src/engine/hooks/mod.rs @@ -11,8 +11,8 @@ pub(crate) use controller::{EngineHooksController, PolledHook}; mod prune; pub use prune::PruneHook; -mod snapshot; -pub use snapshot::SnapshotHook; +mod static_file; +pub use static_file::StaticFileHook; /// Collection of [engine hooks][`EngineHook`]. #[derive(Default)] diff --git a/crates/consensus/beacon/src/engine/hooks/snapshot.rs b/crates/consensus/beacon/src/engine/hooks/snapshot.rs deleted file mode 100644 index 780812818e4d..000000000000 --- a/crates/consensus/beacon/src/engine/hooks/snapshot.rs +++ /dev/null @@ -1,156 +0,0 @@ -//! Snapshot hook for the engine implementation. - -use crate::{ - engine::hooks::{EngineContext, EngineHook, EngineHookError, EngineHookEvent}, - hooks::EngineHookDBAccessLevel, -}; -use futures::FutureExt; -use reth_db::database::Database; -use reth_interfaces::{RethError, RethResult}; -use reth_primitives::BlockNumber; -use reth_snapshot::{Snapshotter, SnapshotterError, SnapshotterWithResult}; -use reth_tasks::TaskSpawner; -use std::task::{ready, Context, Poll}; -use tokio::sync::oneshot; - -/// Manages snapshotting under the control of the engine. -/// -/// This type controls the [Snapshotter]. -#[derive(Debug)] -pub struct SnapshotHook { - /// The current state of the snapshotter. - state: SnapshotterState, - /// The type that can spawn the snapshotter task. - task_spawner: Box, -} - -impl SnapshotHook { - /// Create a new instance - pub fn new(snapshotter: Snapshotter, task_spawner: Box) -> Self { - Self { state: SnapshotterState::Idle(Some(snapshotter)), task_spawner } - } - - /// Advances the snapshotter state. - /// - /// This checks for the result in the channel, or returns pending if the snapshotter is idle. - fn poll_snapshotter(&mut self, cx: &mut Context<'_>) -> Poll> { - let result = match self.state { - SnapshotterState::Idle(_) => return Poll::Pending, - SnapshotterState::Running(ref mut fut) => { - ready!(fut.poll_unpin(cx)) - } - }; - - let event = match result { - Ok((snapshotter, result)) => { - self.state = SnapshotterState::Idle(Some(snapshotter)); - - match result { - Ok(_) => EngineHookEvent::Finished(Ok(())), - Err(err) => EngineHookEvent::Finished(Err(err.into())), - } - } - Err(_) => { - // failed to receive the snapshotter - EngineHookEvent::Finished(Err(EngineHookError::ChannelClosed)) - } - }; - - Poll::Ready(Ok(event)) - } - - /// This will try to spawn the snapshotter if it is idle: - /// 1. Check if snapshotting is needed through [Snapshotter::get_snapshot_targets] and then - /// [SnapshotTargets::any](reth_snapshot::SnapshotTargets::any). - /// 2. - /// 1. If snapshotting is needed, pass snapshot request to the [Snapshotter::run] and spawn - /// it in a separate task. Set snapshotter state to [SnapshotterState::Running]. - /// 2. If snapshotting is not needed, set snapshotter state back to - /// [SnapshotterState::Idle]. - /// - /// If snapshotter is already running, do nothing. - fn try_spawn_snapshotter( - &mut self, - finalized_block_number: BlockNumber, - ) -> RethResult> { - Ok(match &mut self.state { - SnapshotterState::Idle(snapshotter) => { - let Some(mut snapshotter) = snapshotter.take() else { return Ok(None) }; - - let targets = snapshotter.get_snapshot_targets(finalized_block_number)?; - - // Check if the snapshotting of any data has been requested. - if targets.any() { - let (tx, rx) = oneshot::channel(); - self.task_spawner.spawn_critical_blocking( - "snapshotter task", - Box::pin(async move { - let result = snapshotter.run(targets); - let _ = tx.send((snapshotter, result)); - }), - ); - self.state = SnapshotterState::Running(rx); - - Some(EngineHookEvent::Started) - } else { - self.state = SnapshotterState::Idle(Some(snapshotter)); - Some(EngineHookEvent::NotReady) - } - } - SnapshotterState::Running(_) => None, - }) - } -} - -impl EngineHook for SnapshotHook { - fn name(&self) -> &'static str { - "Snapshot" - } - - fn poll( - &mut self, - cx: &mut Context<'_>, - ctx: EngineContext, - ) -> Poll> { - let Some(finalized_block_number) = ctx.finalized_block_number else { - return Poll::Ready(Ok(EngineHookEvent::NotReady)) - }; - - // Try to spawn a snapshotter - match self.try_spawn_snapshotter(finalized_block_number)? { - Some(EngineHookEvent::NotReady) => return Poll::Pending, - Some(event) => return Poll::Ready(Ok(event)), - None => (), - } - - // Poll snapshotter and check its status - self.poll_snapshotter(cx) - } - - fn db_access_level(&self) -> EngineHookDBAccessLevel { - EngineHookDBAccessLevel::ReadOnly - } -} - -/// The possible snapshotter states within the sync controller. -/// -/// [SnapshotterState::Idle] means that the snapshotter is currently idle. -/// [SnapshotterState::Running] means that the snapshotter is currently running. -#[derive(Debug)] -enum SnapshotterState { - /// Snapshotter is idle. - Idle(Option>), - /// Snapshotter is running and waiting for a response - Running(oneshot::Receiver>), -} - -impl From for EngineHookError { - fn from(err: SnapshotterError) -> Self { - match err { - SnapshotterError::InconsistentData(_) => EngineHookError::Internal(Box::new(err)), - SnapshotterError::Interface(err) => err.into(), - SnapshotterError::Database(err) => RethError::Database(err).into(), - SnapshotterError::Provider(err) => RethError::Provider(err).into(), - } - } -} diff --git a/crates/consensus/beacon/src/engine/hooks/static_file.rs b/crates/consensus/beacon/src/engine/hooks/static_file.rs new file mode 100644 index 000000000000..ee4309cc0b72 --- /dev/null +++ b/crates/consensus/beacon/src/engine/hooks/static_file.rs @@ -0,0 +1,163 @@ +//! StaticFile hook for the engine implementation. + +use crate::{ + engine::hooks::{EngineContext, EngineHook, EngineHookError, EngineHookEvent}, + hooks::EngineHookDBAccessLevel, +}; +use futures::FutureExt; +use reth_db::database::Database; +use reth_interfaces::RethResult; +use reth_primitives::{static_file::HighestStaticFiles, BlockNumber}; +use reth_static_file::{StaticFileProducer, StaticFileProducerWithResult}; +use reth_tasks::TaskSpawner; +use std::task::{ready, Context, Poll}; +use tokio::sync::oneshot; +use tracing::trace; + +/// Manages producing static files under the control of the engine. +/// +/// This type controls the [StaticFileProducer]. +#[derive(Debug)] +pub struct StaticFileHook { + /// The current state of the static_file_producer. + state: StaticFileProducerState, + /// The type that can spawn the static_file_producer task. + task_spawner: Box, +} + +impl StaticFileHook { + /// Create a new instance + pub fn new( + static_file_producer: StaticFileProducer, + task_spawner: Box, + ) -> Self { + Self { state: StaticFileProducerState::Idle(Some(static_file_producer)), task_spawner } + } + + /// Advances the static_file_producer state. + /// + /// This checks for the result in the channel, or returns pending if the static_file_producer is + /// idle. + fn poll_static_file_producer( + &mut self, + cx: &mut Context<'_>, + ) -> Poll> { + let result = match self.state { + StaticFileProducerState::Idle(_) => return Poll::Pending, + StaticFileProducerState::Running(ref mut fut) => { + ready!(fut.poll_unpin(cx)) + } + }; + + let event = match result { + Ok((static_file_producer, result)) => { + self.state = StaticFileProducerState::Idle(Some(static_file_producer)); + + match result { + Ok(_) => EngineHookEvent::Finished(Ok(())), + Err(err) => EngineHookEvent::Finished(Err(err.into())), + } + } + Err(_) => { + // failed to receive the static_file_producer + EngineHookEvent::Finished(Err(EngineHookError::ChannelClosed)) + } + }; + + Poll::Ready(Ok(event)) + } + + /// This will try to spawn the static_file_producer if it is idle: + /// 1. Check if producing static files is needed through + /// [StaticFileProducer::get_static_file_targets] and then + /// [StaticFileTargets::any](reth_static_file::StaticFileTargets::any). + /// 2. + /// 1. If producing static files is needed, pass static file request to the + /// [StaticFileProducer::run] and spawn it in a separate task. Set static file producer + /// state to [StaticFileProducerState::Running]. + /// 2. If producing static files is not needed, set static file producer state back to + /// [StaticFileProducerState::Idle]. + /// + /// If static_file_producer is already running, do nothing. + fn try_spawn_static_file_producer( + &mut self, + finalized_block_number: BlockNumber, + ) -> RethResult> { + Ok(match &mut self.state { + StaticFileProducerState::Idle(static_file_producer) => { + let Some(mut static_file_producer) = static_file_producer.take() else { + trace!(target: "consensus::engine::hooks::static_file", "StaticFileProducer is already running but the state is idle"); + return Ok(None); + }; + + let targets = static_file_producer.get_static_file_targets(HighestStaticFiles { + headers: Some(finalized_block_number), + receipts: Some(finalized_block_number), + transactions: Some(finalized_block_number), + })?; + + // Check if the moving data to static files has been requested. + if targets.any() { + let (tx, rx) = oneshot::channel(); + self.task_spawner.spawn_critical_blocking( + "static_file_producer task", + Box::pin(async move { + let result = static_file_producer.run(targets); + let _ = tx.send((static_file_producer, result)); + }), + ); + self.state = StaticFileProducerState::Running(rx); + + Some(EngineHookEvent::Started) + } else { + self.state = StaticFileProducerState::Idle(Some(static_file_producer)); + Some(EngineHookEvent::NotReady) + } + } + StaticFileProducerState::Running(_) => None, + }) + } +} + +impl EngineHook for StaticFileHook { + fn name(&self) -> &'static str { + "StaticFile" + } + + fn poll( + &mut self, + cx: &mut Context<'_>, + ctx: EngineContext, + ) -> Poll> { + let Some(finalized_block_number) = ctx.finalized_block_number else { + trace!(target: "consensus::engine::hooks::static_file", ?ctx, "Finalized block number is not available"); + return Poll::Pending; + }; + + // Try to spawn a static_file_producer + match self.try_spawn_static_file_producer(finalized_block_number)? { + Some(EngineHookEvent::NotReady) => return Poll::Pending, + Some(event) => return Poll::Ready(Ok(event)), + None => (), + } + + // Poll static_file_producer and check its status + self.poll_static_file_producer(cx) + } + + fn db_access_level(&self) -> EngineHookDBAccessLevel { + EngineHookDBAccessLevel::ReadOnly + } +} + +/// The possible static_file_producer states within the sync controller. +/// +/// [StaticFileProducerState::Idle] means that the static file producer is currently idle. +/// [StaticFileProducerState::Running] means that the static file producer is currently running. +#[derive(Debug)] +enum StaticFileProducerState { + /// [StaticFileProducer] is idle. + Idle(Option>), + /// [StaticFileProducer] is running and waiting for a response + Running(oneshot::Receiver>), +} diff --git a/crates/consensus/beacon/src/engine/mod.rs b/crates/consensus/beacon/src/engine/mod.rs index 1cec504b7cee..5ae78a7ab5dc 100644 --- a/crates/consensus/beacon/src/engine/mod.rs +++ b/crates/consensus/beacon/src/engine/mod.rs @@ -361,6 +361,9 @@ where warn!( target: "consensus::engine", hook = %hook.name(), + head_block_hash = ?state.head_block_hash, + safe_block_hash = ?state.safe_block_hash, + finalized_block_hash = ?state.finalized_block_hash, "Hook is in progress, skipping forkchoice update. \ This may affect the performance of your node as a validator." ); @@ -1502,7 +1505,9 @@ where debug!(target: "consensus::engine", hash=?new_head.hash(), number=new_head.number, "Canonicalized new head"); // we can update the FCU blocks - let _ = self.update_canon_chain(new_head, &target); + if let Err(err) = self.update_canon_chain(new_head, &target) { + debug!(target: "consensus::engine", ?err, ?target, "Failed to update the canonical chain tracker"); + } // we're no longer syncing self.sync_state_updater.update_sync_state(SyncState::Idle); @@ -1704,9 +1709,18 @@ where None } - fn on_hook_result(&self, result: PolledHook) -> Result<(), BeaconConsensusEngineError> { - if result.db_access_level.is_read_write() { - match result.event { + fn on_hook_result(&self, polled_hook: PolledHook) -> Result<(), BeaconConsensusEngineError> { + if let EngineHookEvent::Finished(Err(error)) = &polled_hook.event { + error!( + target: "consensus::engine", + name = %polled_hook.name, + ?error, + "Hook finished with error" + ) + } + + if polled_hook.db_access_level.is_read_write() { + match polled_hook.event { EngineHookEvent::NotReady => {} EngineHookEvent::Started => { // If the hook has read-write access to the database, it means that the engine @@ -1889,9 +1903,7 @@ mod tests { }; use assert_matches::assert_matches; use reth_interfaces::test_utils::generators::{self, Rng}; - use reth_primitives::{ - stage::StageCheckpoint, ChainSpec, ChainSpecBuilder, B256, MAINNET, U256, - }; + use reth_primitives::{stage::StageCheckpoint, ChainSpecBuilder, MAINNET}; use reth_provider::{BlockWriter, ProviderFactory}; use reth_rpc_types::engine::{ForkchoiceState, ForkchoiceUpdated, PayloadStatus}; use reth_rpc_types_compat::engine::payload::try_block_to_payload_v1; @@ -2064,12 +2076,10 @@ mod tests { } fn insert_blocks<'a, DB: Database>( - db: DB, - chain: Arc, + provider_factory: ProviderFactory, mut blocks: impl Iterator, ) { - let factory = ProviderFactory::new(db, chain); - let provider = factory.provider_rw().unwrap(); + let provider = provider_factory.provider_rw().unwrap(); blocks .try_for_each(|b| { provider @@ -2085,8 +2095,9 @@ mod tests { mod fork_choice_updated { use super::*; - use reth_db::{tables, transaction::DbTxMut}; + use reth_db::{tables, test_utils::create_test_static_files_dir, transaction::DbTxMut}; use reth_interfaces::test_utils::generators::random_block; + use reth_primitives::U256; use reth_rpc_types::engine::ForkchoiceUpdateError; #[tokio::test] @@ -2139,10 +2150,18 @@ mod tests { let genesis = random_block(&mut rng, 0, None, None, Some(0)); let block1 = random_block(&mut rng, 1, Some(genesis.hash()), None, Some(0)); - insert_blocks(env.db.as_ref(), chain_spec.clone(), [&genesis, &block1].into_iter()); + insert_blocks( + ProviderFactory::new( + env.db.as_ref(), + chain_spec.clone(), + create_test_static_files_dir(), + ) + .expect("create provider factory with static_files"), + [&genesis, &block1].into_iter(), + ); env.db .update(|tx| { - tx.put::( + tx.put::( StageId::Finish.to_string(), StageCheckpoint::new(block1.number), ) @@ -2189,7 +2208,15 @@ mod tests { let genesis = random_block(&mut rng, 0, None, None, Some(0)); let block1 = random_block(&mut rng, 1, Some(genesis.hash()), None, Some(0)); - insert_blocks(env.db.as_ref(), chain_spec.clone(), [&genesis, &block1].into_iter()); + insert_blocks( + ProviderFactory::new( + env.db.as_ref(), + chain_spec.clone(), + create_test_static_files_dir(), + ) + .expect("create provider factory with static_files"), + [&genesis, &block1].into_iter(), + ); let mut engine_rx = spawn_consensus_engine(consensus_engine); @@ -2205,7 +2232,15 @@ mod tests { let invalid_rx = env.send_forkchoice_updated(next_forkchoice_state).await; // Insert next head immediately after sending forkchoice update - insert_blocks(env.db.as_ref(), chain_spec.clone(), [&next_head].into_iter()); + insert_blocks( + ProviderFactory::new( + env.db.as_ref(), + chain_spec.clone(), + create_test_static_files_dir(), + ) + .expect("create provider factory with static_files"), + [&next_head].into_iter(), + ); let expected_result = ForkchoiceUpdated::from_status(PayloadStatusEnum::Syncing); assert_matches!(invalid_rx, Ok(result) => assert_eq!(result, expected_result)); @@ -2239,7 +2274,15 @@ mod tests { let genesis = random_block(&mut rng, 0, None, None, Some(0)); let block1 = random_block(&mut rng, 1, Some(genesis.hash()), None, Some(0)); - insert_blocks(env.db.as_ref(), chain_spec.clone(), [&genesis, &block1].into_iter()); + insert_blocks( + ProviderFactory::new( + env.db.as_ref(), + chain_spec.clone(), + create_test_static_files_dir(), + ) + .expect("create provider factory with static_files"), + [&genesis, &block1].into_iter(), + ); let engine = spawn_consensus_engine(consensus_engine); @@ -2287,8 +2330,12 @@ mod tests { block3.header.set_difficulty(U256::from(1)); insert_blocks( - env.db.as_ref(), - chain_spec.clone(), + ProviderFactory::new( + env.db.as_ref(), + chain_spec.clone(), + create_test_static_files_dir(), + ) + .expect("create provider factory with static_files"), [&genesis, &block1, &block2, &block3].into_iter(), ); @@ -2330,7 +2377,15 @@ mod tests { let genesis = random_block(&mut rng, 0, None, None, Some(0)); let block1 = random_block(&mut rng, 1, Some(genesis.hash()), None, Some(0)); - insert_blocks(env.db.as_ref(), chain_spec.clone(), [&genesis, &block1].into_iter()); + insert_blocks( + ProviderFactory::new( + env.db.as_ref(), + chain_spec.clone(), + create_test_static_files_dir(), + ) + .expect("create provider factory with static_files"), + [&genesis, &block1].into_iter(), + ); let _engine = spawn_consensus_engine(consensus_engine); @@ -2352,10 +2407,11 @@ mod tests { mod new_payload { use super::*; + use reth_db::test_utils::create_test_static_files_dir; use reth_interfaces::test_utils::generators::random_block; use reth_primitives::{ genesis::{Genesis, GenesisAllocator}, - Hardfork, + Hardfork, U256, }; use reth_provider::test_utils::blocks::BlockChainTestData; @@ -2426,8 +2482,12 @@ mod tests { let block1 = random_block(&mut rng, 1, Some(genesis.hash()), None, Some(0)); let block2 = random_block(&mut rng, 2, Some(block1.hash()), None, Some(0)); insert_blocks( - env.db.as_ref(), - chain_spec.clone(), + ProviderFactory::new( + env.db.as_ref(), + chain_spec.clone(), + create_test_static_files_dir(), + ) + .expect("create provider factory with static_files"), [&genesis, &block1, &block2].into_iter(), ); @@ -2492,7 +2552,15 @@ mod tests { // TODO: add transactions that transfer from the alloc accounts, generating the new // block tx and state root - insert_blocks(env.db.as_ref(), chain_spec.clone(), [&genesis, &block1].into_iter()); + insert_blocks( + ProviderFactory::new( + env.db.as_ref(), + chain_spec.clone(), + create_test_static_files_dir(), + ) + .expect("create provider factory with static_files"), + [&genesis, &block1].into_iter(), + ); let mut engine_rx = spawn_consensus_engine(consensus_engine); @@ -2530,7 +2598,15 @@ mod tests { let genesis = random_block(&mut rng, 0, None, None, Some(0)); - insert_blocks(env.db.as_ref(), chain_spec.clone(), [&genesis].into_iter()); + insert_blocks( + ProviderFactory::new( + env.db.as_ref(), + chain_spec.clone(), + create_test_static_files_dir(), + ) + .expect("create provider factory with static_files"), + [&genesis].into_iter(), + ); let mut engine_rx = spawn_consensus_engine(consensus_engine); @@ -2589,8 +2665,12 @@ mod tests { .build(); insert_blocks( - env.db.as_ref(), - chain_spec.clone(), + ProviderFactory::new( + env.db.as_ref(), + chain_spec.clone(), + create_test_static_files_dir(), + ) + .expect("create provider factory with static_files"), [&data.genesis, &block1].into_iter(), ); diff --git a/crates/consensus/beacon/src/engine/sync.rs b/crates/consensus/beacon/src/engine/sync.rs index 6b25643e95a0..c6bd452be5da 100644 --- a/crates/consensus/beacon/src/engine/sync.rs +++ b/crates/consensus/beacon/src/engine/sync.rs @@ -398,13 +398,14 @@ mod tests { use reth_interfaces::{p2p::either::EitherDownloader, test_utils::TestFullBlockClient}; use reth_primitives::{ constants::ETHEREUM_BLOCK_GAS_LIMIT, stage::StageCheckpoint, BlockBody, ChainSpecBuilder, - Header, SealedHeader, MAINNET, + Header, PruneModes, SealedHeader, MAINNET, }; use reth_provider::{ test_utils::{create_test_provider_factory_with_chain_spec, TestExecutorFactory}, BundleStateWithReceipts, }; use reth_stages::{test_utils::TestStages, ExecOutput, StageError}; + use reth_static_file::StaticFileProducer; use reth_tasks::TokioTaskExecutor; use std::{collections::VecDeque, future::poll_fn, ops::Range}; use tokio::sync::watch; @@ -465,7 +466,15 @@ mod tests { pipeline = pipeline.with_max_block(max_block); } - pipeline.build(create_test_provider_factory_with_chain_spec(chain_spec)) + let provider_factory = create_test_provider_factory_with_chain_spec(chain_spec); + + let static_file_producer = StaticFileProducer::new( + provider_factory.clone(), + provider_factory.static_file_provider(), + PruneModes::default(), + ); + + pipeline.build(provider_factory, static_file_producer) } } diff --git a/crates/consensus/beacon/src/engine/test_utils.rs b/crates/consensus/beacon/src/engine/test_utils.rs index 3541a84ceedb..e491b1fe33ef 100644 --- a/crates/consensus/beacon/src/engine/test_utils.rs +++ b/crates/consensus/beacon/src/engine/test_utils.rs @@ -6,10 +6,7 @@ use crate::{ use reth_blockchain_tree::{ config::BlockchainTreeConfig, externals::TreeExternals, BlockchainTree, ShareableBlockchainTree, }; -use reth_db::{ - test_utils::{create_test_rw_db, TempDatabase}, - DatabaseEnv as DE, -}; +use reth_db::{test_utils::TempDatabase, DatabaseEnv as DE}; type DatabaseEnv = TempDatabase; use reth_downloaders::{ bodies::bodies::BodiesDownloaderBuilder, @@ -24,10 +21,11 @@ use reth_interfaces::{ }; use reth_node_ethereum::{EthEngineTypes, EthEvmConfig}; use reth_payload_builder::test_utils::spawn_test_payload_service; -use reth_primitives::{BlockNumber, ChainSpec, B256}; +use reth_primitives::{BlockNumber, ChainSpec, PruneModes, B256}; use reth_provider::{ - providers::BlockchainProvider, test_utils::TestExecutorFactory, BundleStateWithReceipts, - ExecutorFactory, HeaderSyncMode, ProviderFactory, PrunableBlockExecutor, + providers::BlockchainProvider, + test_utils::{create_test_provider_factory_with_chain_spec, TestExecutorFactory}, + BundleStateWithReceipts, ExecutorFactory, HeaderSyncMode, PrunableBlockExecutor, }; use reth_prune::Pruner; use reth_revm::EvmProcessorFactory; @@ -35,6 +33,7 @@ use reth_rpc_types::engine::{ CancunPayloadFields, ExecutionPayload, ForkchoiceState, ForkchoiceUpdated, PayloadStatus, }; use reth_stages::{sets::DefaultStages, test_utils::TestStages, ExecOutput, Pipeline, StageError}; +use reth_static_file::StaticFileProducer; use reth_tasks::TokioTaskExecutor; use std::{collections::VecDeque, sync::Arc}; use tokio::sync::{oneshot, watch}; @@ -348,9 +347,8 @@ where /// Builds the test consensus engine into a `TestConsensusEngine` and `TestEnv`. pub fn build(self) -> (TestBeaconConsensusEngine, TestEnv>) { reth_tracing::init_test_tracing(); - let db = create_test_rw_db(); let provider_factory = - ProviderFactory::new(db.clone(), self.base_config.chain_spec.clone()); + create_test_provider_factory_with_chain_spec(self.base_config.chain_spec.clone()); let consensus: Arc = match self.base_config.consensus { TestConsensusConfig::Real => { @@ -380,6 +378,12 @@ where )), }; + let static_file_producer = StaticFileProducer::new( + provider_factory.clone(), + provider_factory.static_file_provider(), + PruneModes::default(), + ); + // Setup pipeline let (tip_tx, tip_rx) = watch::channel(B256::default()); let mut pipeline = match self.base_config.pipeline_config { @@ -395,14 +399,17 @@ where .build(client.clone(), consensus.clone(), provider_factory.clone()) .into_task(); - Pipeline::builder().add_stages(DefaultStages::new( - ProviderFactory::new(db.clone(), self.base_config.chain_spec.clone()), - HeaderSyncMode::Tip(tip_rx.clone()), - Arc::clone(&consensus), - header_downloader, - body_downloader, - executor_factory.clone(), - )) + Pipeline::builder().add_stages( + DefaultStages::new( + provider_factory.clone(), + HeaderSyncMode::Tip(tip_rx.clone()), + Arc::clone(&consensus), + header_downloader, + body_downloader, + executor_factory.clone(), + ) + .expect("should build"), + ) } }; @@ -410,7 +417,7 @@ where pipeline = pipeline.with_max_block(max_block); } - let pipeline = pipeline.build(provider_factory.clone()); + let pipeline = pipeline.build(provider_factory.clone(), static_file_producer); // Setup blockchain tree let externals = TreeExternals::new(provider_factory.clone(), consensus, executor_factory); @@ -423,12 +430,11 @@ where BlockchainProvider::with_latest(provider_factory.clone(), tree, latest); let pruner = Pruner::new( - provider_factory, + provider_factory.clone(), vec![], 5, self.base_config.chain_spec.prune_delete_limit, config.max_reorg_depth() as usize, - watch::channel(None).1, ); let mut hooks = EngineHooks::new(); @@ -453,7 +459,7 @@ where engine.sync.set_max_block(max_block) } - (engine, TestEnv::new(db, tip_rx, handle)) + (engine, TestEnv::new(provider_factory.db_ref().clone(), tip_rx, handle)) } } diff --git a/crates/consensus/common/src/validation.rs b/crates/consensus/common/src/validation.rs index 6857c4a23084..9c1f523aa31f 100644 --- a/crates/consensus/common/src/validation.rs +++ b/crates/consensus/common/src/validation.rs @@ -439,7 +439,7 @@ mod tests { gas_price: 0x28f000fff, gas_limit: 10, to: TransactionKind::Call(Address::default()), - value: 3_u64.into(), + value: U256::from(3_u64), input: Bytes::from(vec![1, 2]), access_list: Default::default(), }); @@ -461,7 +461,7 @@ mod tests { max_fee_per_blob_gas: 0x7, gas_limit: 10, to: TransactionKind::Call(Address::default()), - value: 3_u64.into(), + value: U256::from(3_u64), input: Bytes::from(vec![1, 2]), access_list: Default::default(), blob_versioned_hashes: std::iter::repeat_with(|| rng.gen()).take(num_blobs).collect(), diff --git a/crates/etl/Cargo.toml b/crates/etl/Cargo.toml new file mode 100644 index 000000000000..07af6c72968f --- /dev/null +++ b/crates/etl/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "reth-etl" +version.workspace = true +edition.workspace = true +rust-version.workspace = true +license.workspace = true +homepage.workspace = true +repository.workspace = true +exclude.workspace = true + +[dependencies] +tempfile.workspace = true +reth-db.workspace = true +rayon.workspace = true + +[dev-dependencies] +reth-primitives.workspace = true diff --git a/crates/etl/src/lib.rs b/crates/etl/src/lib.rs new file mode 100644 index 000000000000..0fc865b98d23 --- /dev/null +++ b/crates/etl/src/lib.rs @@ -0,0 +1,264 @@ +//! ETL data collector. +//! +//! This crate is useful for dumping unsorted data into temporary files and iterating on their +//! sorted representation later on. +//! +//! This has multiple uses, such as optimizing database inserts (for Btree based databases) and +//! memory management (as it moves the buffer to disk instead of memory). + +#![doc( + html_logo_url = "https://raw.githubusercontent.com/paradigmxyz/reth/main/assets/reth-docs.png", + html_favicon_url = "https://avatars0.githubusercontent.com/u/97369466?s=256", + issue_tracker_base_url = "https://github.com/paradigmxyz/reth/issues/" +)] +#![warn(missing_debug_implementations, missing_docs, unreachable_pub, rustdoc::all)] +#![deny(unused_must_use, rust_2018_idioms)] +#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] + +use std::{ + cmp::Reverse, + collections::BinaryHeap, + io::{BufReader, BufWriter, Read, Seek, SeekFrom, Write}, + path::Path, + sync::Arc, +}; + +use rayon::prelude::*; +use reth_db::table::{Compress, Encode, Key, Value}; +use tempfile::{NamedTempFile, TempDir}; + +/// An ETL (extract, transform, load) data collector. +/// +/// Data is pushed (extract) to the collector which internally flushes the data in a sorted +/// (transform) manner to files of some specified capacity. +/// +/// The data can later be iterated over (load) in a sorted manner. +#[derive(Debug)] +pub struct Collector +where + K: Encode + Ord, + V: Compress, + ::Encoded: std::fmt::Debug, + ::Compressed: std::fmt::Debug, +{ + /// Directory for temporary file storage + dir: Arc, + /// Collection of temporary ETL files + files: Vec, + /// Current buffer size in bytes + buffer_size_bytes: usize, + /// Maximum buffer capacity in bytes, triggers flush when reached + buffer_capacity_bytes: usize, + /// In-memory buffer storing encoded and compressed key-value pairs + buffer: Vec<(::Encoded, ::Compressed)>, + /// Total number of elements in the collector, including all files + len: usize, +} + +impl Collector +where + K: Key, + V: Value, + ::Encoded: Ord + std::fmt::Debug, + ::Compressed: Ord + std::fmt::Debug, +{ + /// Create a new collector in a specific temporary directory with some capacity. + /// + /// Once the capacity (in bytes) is reached, the data is sorted and flushed to disk. + pub fn new(dir: Arc, buffer_capacity_bytes: usize) -> Self { + Self { + dir, + buffer_size_bytes: 0, + files: Vec::new(), + buffer_capacity_bytes, + buffer: Vec::new(), + len: 0, + } + } + + /// Returns number of elements currently in the collector. + pub fn len(&self) -> usize { + self.len + } + + /// Returns `true` if there are currently no elements in the collector. + pub fn is_empty(&self) -> bool { + self.len == 0 + } + + /// Insert an entry into the collector. + pub fn insert(&mut self, key: K, value: V) { + let key = key.encode(); + let value = value.compress(); + self.buffer_size_bytes += key.as_ref().len() + value.as_ref().len(); + self.buffer.push((key, value)); + if self.buffer_size_bytes > self.buffer_capacity_bytes { + self.flush(); + } + self.len += 1; + } + + fn flush(&mut self) { + self.buffer_size_bytes = 0; + self.buffer.par_sort_unstable_by(|a, b| a.0.cmp(&b.0)); + let mut buf = Vec::with_capacity(self.buffer.len()); + std::mem::swap(&mut buf, &mut self.buffer); + self.files.push(EtlFile::new(self.dir.path(), buf).expect("could not flush data to disk")) + } + + /// Returns an iterator over the collector data. + /// + /// The items of the iterator are sorted across all underlying files. + /// + /// # Note + /// + /// The keys and values have been pre-encoded, meaning they *SHOULD NOT* be encoded or + /// compressed again. + pub fn iter(&mut self) -> std::io::Result> { + // Flush the remaining items to disk + if self.buffer_size_bytes > 0 { + self.flush(); + } + + let mut heap = BinaryHeap::new(); + for (current_id, file) in self.files.iter_mut().enumerate() { + if let Some((current_key, current_value)) = file.read_next()? { + heap.push((Reverse((current_key, current_value)), current_id)); + } + } + + Ok(EtlIter { heap, files: &mut self.files }) + } +} + +/// `EtlIter` is an iterator for traversing through sorted key-value pairs in a collection of ETL +/// files. These files are created using the [`Collector`] and contain data where keys are encoded +/// and values are compressed. +/// +/// This iterator returns each key-value pair in ascending order based on the key. +/// It is particularly designed to efficiently handle large datasets by employing a binary heap for +/// managing the iteration order. +#[derive(Debug)] +pub struct EtlIter<'a> { + /// Heap managing the next items to be iterated. + #[allow(clippy::type_complexity)] + heap: BinaryHeap<(Reverse<(Vec, Vec)>, usize)>, + /// Reference to the vector of ETL files being iterated over. + files: &'a mut Vec, +} + +impl<'a> EtlIter<'a> { + /// Peeks into the next element + pub fn peek(&self) -> Option<&(Vec, Vec)> { + self.heap.peek().map(|(Reverse(entry), _)| entry) + } +} + +impl<'a> Iterator for EtlIter<'a> { + type Item = std::io::Result<(Vec, Vec)>; + + fn next(&mut self) -> Option { + // Get the next sorted entry from the heap + let (Reverse(entry), id) = self.heap.pop()?; + + // Populate the heap with the next entry from the same file + match self.files[id].read_next() { + Ok(Some((key, value))) => { + self.heap.push((Reverse((key, value)), id)); + Some(Ok(entry)) + } + Ok(None) => Some(Ok(entry)), + err => err.transpose(), + } + } +} + +/// A temporary ETL file. +#[derive(Debug)] +struct EtlFile { + file: BufReader, + len: usize, +} + +impl EtlFile { + /// Create a new file with the given data (which should be pre-sorted) at the given path. + /// + /// The file will be a temporary file. + pub(crate) fn new(dir: &Path, buffer: Vec<(K, V)>) -> std::io::Result + where + Self: Sized, + K: AsRef<[u8]>, + V: AsRef<[u8]>, + { + let file = NamedTempFile::new_in(dir)?; + let mut w = BufWriter::new(file); + for entry in &buffer { + let k = entry.0.as_ref(); + let v = entry.1.as_ref(); + + w.write_all(&k.len().to_be_bytes())?; + w.write_all(&v.len().to_be_bytes())?; + w.write_all(k)?; + w.write_all(v)?; + } + + let mut file = BufReader::new(w.into_inner()?); + file.seek(SeekFrom::Start(0))?; + let len = buffer.len(); + Ok(Self { file, len }) + } + + /// Read the next entry in the file. + /// + /// Can return error if it reaches EOF before filling the internal buffers. + pub(crate) fn read_next(&mut self) -> std::io::Result, Vec)>> { + if self.len == 0 { + return Ok(None) + } + + let mut buffer_key_length = [0; 8]; + let mut buffer_value_length = [0; 8]; + + self.file.read_exact(&mut buffer_key_length)?; + self.file.read_exact(&mut buffer_value_length)?; + + let key_length = usize::from_be_bytes(buffer_key_length); + let value_length = usize::from_be_bytes(buffer_value_length); + let mut key = vec![0; key_length]; + let mut value = vec![0; value_length]; + + self.file.read_exact(&mut key)?; + self.file.read_exact(&mut value)?; + + self.len -= 1; + + Ok(Some((key, value))) + } +} + +#[cfg(test)] +mod tests { + use reth_primitives::{TxHash, TxNumber}; + + use super::*; + + #[test] + fn etl_hashes() { + let mut entries: Vec<_> = + (0..10_000).map(|id| (TxHash::random(), id as TxNumber)).collect(); + + let mut collector = Collector::new(Arc::new(TempDir::new().unwrap()), 1024); + for (k, v) in entries.clone() { + collector.insert(k, v); + } + entries.sort_unstable_by_key(|entry| entry.0); + + for (id, entry) in collector.iter().unwrap().enumerate() { + let expected = entries[id]; + assert_eq!( + entry.unwrap(), + (expected.0.encode().to_vec(), expected.1.compress().to_vec()) + ); + } + } +} diff --git a/crates/interfaces/src/provider.rs b/crates/interfaces/src/provider.rs index 5e8a6a02431f..513e3efe9d91 100644 --- a/crates/interfaces/src/provider.rs +++ b/crates/interfaces/src/provider.rs @@ -1,5 +1,5 @@ use reth_primitives::{ - Address, BlockHash, BlockHashOrNumber, BlockNumber, GotExpected, SnapshotSegment, + Address, BlockHash, BlockHashOrNumber, BlockNumber, GotExpected, StaticFileSegment, TxHashOrNumber, TxNumber, B256, U256, }; use std::path::PathBuf; @@ -113,15 +113,18 @@ pub enum ProviderError { /// Provider does not support this particular request. #[error("this provider does not support this request")] UnsupportedProvider, - /// Snapshot file is not found at specified path. - #[error("not able to find {0} snapshot file at {1}")] - MissingSnapshotPath(SnapshotSegment, PathBuf), - /// Snapshot file is not found for requested block. - #[error("not able to find {0} snapshot file for block number {1}")] - MissingSnapshotBlock(SnapshotSegment, BlockNumber), - /// Snapshot file is not found for requested transaction. - #[error("not able to find {0} snapshot file for transaction id {1}")] - MissingSnapshotTx(SnapshotSegment, TxNumber), + /// Static File is not found at specified path. + #[error("not able to find {0} static file at {1}")] + MissingStaticFilePath(StaticFileSegment, PathBuf), + /// Static File is not found for requested block. + #[error("not able to find {0} static file for block number {1}")] + MissingStaticFileBlock(StaticFileSegment, BlockNumber), + /// Static File is not found for requested transaction. + #[error("unable to find {0} static file for transaction id {1}")] + MissingStaticFileTx(StaticFileSegment, TxNumber), + /// Static File is finalized and cannot be written to. + #[error("unable to write block #{1} to finalized static file {0}")] + FinalizedStaticFile(StaticFileSegment, BlockNumber), /// Error encountered when the block number conversion from U256 to u64 causes an overflow. #[error("failed to convert block number U256 to u64: {0}")] BlockNumberOverflow(U256), diff --git a/crates/interfaces/src/test_utils/generators.rs b/crates/interfaces/src/test_utils/generators.rs index eb6e72cccc4b..826a737d6684 100644 --- a/crates/interfaces/src/test_utils/generators.rs +++ b/crates/interfaces/src/test_utils/generators.rs @@ -80,7 +80,7 @@ pub fn random_tx(rng: &mut R) -> Transaction { gas_price: rng.gen::().into(), gas_limit: rng.gen::().into(), to: TransactionKind::Call(rng.gen()), - value: U256::from(rng.gen::()).into(), + value: U256::from(rng.gen::()), input: Bytes::default(), }) } @@ -324,12 +324,9 @@ pub fn random_eoa_account(rng: &mut R) -> (Address, Account) { } /// Generate random Externally Owned Accounts -pub fn random_eoa_account_range( - rng: &mut R, - acc_range: Range, -) -> Vec<(Address, Account)> { - let mut accounts = Vec::with_capacity(acc_range.end.saturating_sub(acc_range.start) as usize); - for _ in acc_range { +pub fn random_eoa_accounts(rng: &mut R, accounts_num: usize) -> Vec<(Address, Account)> { + let mut accounts = Vec::with_capacity(accounts_num); + for _ in 0..accounts_num { accounts.push(random_eoa_account(rng)) } accounts @@ -399,7 +396,7 @@ mod tests { nonce: 0x42, gas_limit: 44386, to: TransactionKind::Call(hex!("6069a6c32cf691f5982febae4faf8a6f3ab2f0f6").into()), - value: 0_u64.into(), + value: U256::from(0_u64), input: hex!("a22cb4650000000000000000000000005eee75727d804a2b13038928d36f8b188945a57a0000000000000000000000000000000000000000000000000000000000000000").into(), max_fee_per_gas: 0x4a817c800, max_priority_fee_per_gas: 0x3b9aca00, @@ -431,7 +428,7 @@ mod tests { gas_price: 20 * 10_u128.pow(9), gas_limit: 21000, to: TransactionKind::Call(hex!("3535353535353535353535353535353535353535").into()), - value: 10_u128.pow(18).into(), + value: U256::from(10_u128.pow(18)), input: Bytes::default(), }); diff --git a/crates/net/downloaders/src/bodies/bodies.rs b/crates/net/downloaders/src/bodies/bodies.rs index 627b5ea9d246..2d436acf798e 100644 --- a/crates/net/downloaders/src/bodies/bodies.rs +++ b/crates/net/downloaders/src/bodies/bodies.rs @@ -596,7 +596,7 @@ mod tests { test_utils::{generate_bodies, TestBodiesClient}, }; use assert_matches::assert_matches; - use reth_db::test_utils::create_test_rw_db; + use reth_db::test_utils::{create_test_rw_db, create_test_static_files_dir}; use reth_interfaces::test_utils::{generators, generators::random_block_range, TestConsensus}; use reth_primitives::{BlockBody, B256, MAINNET}; use reth_provider::ProviderFactory; @@ -618,7 +618,7 @@ mod tests { let mut downloader = BodiesDownloaderBuilder::default().build( client.clone(), Arc::new(TestConsensus::default()), - ProviderFactory::new(db, MAINNET.clone()), + ProviderFactory::new(db, MAINNET.clone(), create_test_static_files_dir()).unwrap(), ); downloader.set_download_range(0..=19).expect("failed to set download range"); @@ -657,7 +657,7 @@ mod tests { BodiesDownloaderBuilder::default().with_request_limit(request_limit).build( client.clone(), Arc::new(TestConsensus::default()), - ProviderFactory::new(db, MAINNET.clone()), + ProviderFactory::new(db, MAINNET.clone(), create_test_static_files_dir()).unwrap(), ); downloader.set_download_range(0..=199).expect("failed to set download range"); @@ -686,7 +686,7 @@ mod tests { .build( client.clone(), Arc::new(TestConsensus::default()), - ProviderFactory::new(db, MAINNET.clone()), + ProviderFactory::new(db, MAINNET.clone(), create_test_static_files_dir()).unwrap(), ); let mut range_start = 0; @@ -716,7 +716,7 @@ mod tests { let mut downloader = BodiesDownloaderBuilder::default().with_stream_batch_size(100).build( client.clone(), Arc::new(TestConsensus::default()), - ProviderFactory::new(db, MAINNET.clone()), + ProviderFactory::new(db, MAINNET.clone(), create_test_static_files_dir()).unwrap(), ); // Set and download the first range @@ -756,7 +756,7 @@ mod tests { .build( client.clone(), Arc::new(TestConsensus::default()), - ProviderFactory::new(db, MAINNET.clone()), + ProviderFactory::new(db, MAINNET.clone(), create_test_static_files_dir()).unwrap(), ); // Set and download the entire range @@ -787,7 +787,7 @@ mod tests { .build( client.clone(), Arc::new(TestConsensus::default()), - ProviderFactory::new(db, MAINNET.clone()), + ProviderFactory::new(db, MAINNET.clone(), create_test_static_files_dir()).unwrap(), ); // Download the requested range diff --git a/crates/net/downloaders/src/bodies/task.rs b/crates/net/downloaders/src/bodies/task.rs index cbae7628f1ad..2424101db529 100644 --- a/crates/net/downloaders/src/bodies/task.rs +++ b/crates/net/downloaders/src/bodies/task.rs @@ -169,20 +169,18 @@ mod tests { test_utils::{generate_bodies, TestBodiesClient}, }; use assert_matches::assert_matches; - use reth_db::test_utils::create_test_rw_db; use reth_interfaces::{p2p::error::DownloadError, test_utils::TestConsensus}; - use reth_primitives::MAINNET; - use reth_provider::ProviderFactory; + use reth_provider::test_utils::create_test_provider_factory; use std::sync::Arc; #[tokio::test(flavor = "multi_thread")] async fn download_one_by_one_on_task() { reth_tracing::init_test_tracing(); - let db = create_test_rw_db(); + let factory = create_test_provider_factory(); let (headers, mut bodies) = generate_bodies(0..=19); - insert_headers(db.db(), &headers); + insert_headers(factory.db_ref().db(), &headers); let client = Arc::new( TestBodiesClient::default().with_bodies(bodies.clone()).with_should_delay(true), @@ -190,7 +188,7 @@ mod tests { let downloader = BodiesDownloaderBuilder::default().build( client.clone(), Arc::new(TestConsensus::default()), - ProviderFactory::new(db, MAINNET.clone()), + factory, ); let mut downloader = TaskDownloader::spawn(downloader); @@ -208,11 +206,10 @@ mod tests { async fn set_download_range_error_returned() { reth_tracing::init_test_tracing(); - let db = create_test_rw_db(); let downloader = BodiesDownloaderBuilder::default().build( Arc::new(TestBodiesClient::default()), Arc::new(TestConsensus::default()), - ProviderFactory::new(db, MAINNET.clone()), + create_test_provider_factory(), ); let mut downloader = TaskDownloader::spawn(downloader); diff --git a/crates/net/downloaders/src/file_client.rs b/crates/net/downloaders/src/file_client.rs index f3504a1b1547..ebc5fe40895a 100644 --- a/crates/net/downloaders/src/file_client.rs +++ b/crates/net/downloaders/src/file_client.rs @@ -241,7 +241,6 @@ mod tests { }; use assert_matches::assert_matches; use futures_util::stream::StreamExt; - use reth_db::test_utils::create_test_rw_db; use reth_interfaces::{ p2p::{ bodies::downloader::BodyDownloader, @@ -249,17 +248,17 @@ mod tests { }, test_utils::TestConsensus, }; - use reth_primitives::{SealedHeader, MAINNET}; - use reth_provider::ProviderFactory; + use reth_primitives::SealedHeader; + use reth_provider::test_utils::create_test_provider_factory; use std::sync::Arc; #[tokio::test] async fn streams_bodies_from_buffer() { // Generate some random blocks - let db = create_test_rw_db(); + let factory = create_test_provider_factory(); let (headers, mut bodies) = generate_bodies(0..=19); - insert_headers(db.db(), &headers); + insert_headers(factory.db_ref().db(), &headers); // create an empty file let file = tempfile::tempfile().unwrap(); @@ -269,7 +268,7 @@ mod tests { let mut downloader = BodiesDownloaderBuilder::default().build( client.clone(), Arc::new(TestConsensus::default()), - ProviderFactory::new(db, MAINNET.clone()), + factory, ); downloader.set_download_range(0..=19).expect("failed to set download range"); @@ -337,19 +336,19 @@ mod tests { #[tokio::test] async fn test_download_bodies_from_file() { // Generate some random blocks - let db = create_test_rw_db(); + let factory = create_test_provider_factory(); let (file, headers, mut bodies) = generate_bodies_file(0..=19).await; // now try to read them back let client = Arc::new(FileClient::from_file(file).await.unwrap()); // insert headers in db for the bodies downloader - insert_headers(db.db(), &headers); + insert_headers(factory.db_ref().db(), &headers); let mut downloader = BodiesDownloaderBuilder::default().build( client.clone(), Arc::new(TestConsensus::default()), - ProviderFactory::new(db, MAINNET.clone()), + factory, ); downloader.set_download_range(0..=19).expect("failed to set download range"); diff --git a/crates/net/eth-wire/Cargo.toml b/crates/net/eth-wire/Cargo.toml index 7bf89f3b2c38..bab06af474d2 100644 --- a/crates/net/eth-wire/Cargo.toml +++ b/crates/net/eth-wire/Cargo.toml @@ -25,7 +25,7 @@ reth-metrics.workspace = true metrics.workspace = true bytes.workspace = true -derive_more = "0.99.17" +derive_more.workspace = true thiserror.workspace = true serde = { workspace = true, optional = true } tokio = { workspace = true, features = ["full"] } diff --git a/crates/net/eth-wire/src/types/blocks.rs b/crates/net/eth-wire/src/types/blocks.rs index b27c028cf581..9491e34f0948 100644 --- a/crates/net/eth-wire/src/types/blocks.rs +++ b/crates/net/eth-wire/src/types/blocks.rs @@ -386,7 +386,7 @@ mod tests { gas_price: 0x4a817c808, gas_limit: 0x2e248u64, to: TransactionKind::Call(hex!("3535353535353535353535353535353535353535").into()), - value: 0x200u64.into(), + value: U256::from(0x200u64), input: Default::default(), }), Signature { @@ -401,7 +401,7 @@ mod tests { gas_price: 0x4a817c809, gas_limit: 0x33450u64, to: TransactionKind::Call(hex!("3535353535353535353535353535353535353535").into()), - value: 0x2d9u64.into(), + value: U256::from(0x2d9u64), input: Default::default(), }), Signature { odd_y_parity: false, @@ -458,7 +458,7 @@ mod tests { gas_price: 0x4a817c808, gas_limit: 0x2e248u64, to: TransactionKind::Call(hex!("3535353535353535353535353535353535353535").into()), - value: 0x200u64.into(), + value: U256::from(0x200u64), input: Default::default(), }), Signature { @@ -474,7 +474,7 @@ mod tests { gas_price: 0x4a817c809, gas_limit: 0x33450u64, to: TransactionKind::Call(hex!("3535353535353535353535353535353535353535").into()), - value: 0x2d9u64.into(), + value: U256::from(0x2d9u64), input: Default::default(), }), Signature { diff --git a/crates/net/eth-wire/src/types/transactions.rs b/crates/net/eth-wire/src/types/transactions.rs index 6288be35cda4..dd593c5c7bb6 100644 --- a/crates/net/eth-wire/src/types/transactions.rs +++ b/crates/net/eth-wire/src/types/transactions.rs @@ -130,7 +130,7 @@ mod tests { to: TransactionKind::Call( hex!("3535353535353535353535353535353535353535").into(), ), - value: 0x200u64.into(), + value: U256::from(0x200u64), input: Default::default(), }), Signature { @@ -154,7 +154,7 @@ mod tests { to: TransactionKind::Call( hex!("3535353535353535353535353535353535353535").into(), ), - value: 0x2d9u64.into(), + value: U256::from(0x2d9u64), input: Default::default(), }), Signature { @@ -192,7 +192,7 @@ mod tests { to: TransactionKind::Call( hex!("3535353535353535353535353535353535353535").into(), ), - value: 0x200u64.into(), + value: U256::from(0x200u64), input: Default::default(), }), Signature { @@ -216,7 +216,7 @@ mod tests { to: TransactionKind::Call( hex!("3535353535353535353535353535353535353535").into(), ), - value: 0x2d9u64.into(), + value: U256::from(0x2d9u64), input: Default::default(), }), Signature { @@ -257,7 +257,7 @@ mod tests { to: TransactionKind::Call( hex!("cf7f9e66af820a19257a2108375b180b0ec49167").into(), ), - value: 1234u64.into(), + value: U256::from(1234u64), input: Default::default(), }), Signature { @@ -282,7 +282,7 @@ mod tests { to: TransactionKind::Call( hex!("61815774383099e24810ab832a5b2a5425c154d5").into(), ), - value: 3000000000000000000u64.into(), + value: U256::from(3000000000000000000u64), input: Default::default(), access_list: Default::default(), }), @@ -307,7 +307,7 @@ mod tests { to: TransactionKind::Call( hex!("d3e8763675e4c425df46cc3b5c0f6cbdac396046").into(), ), - value: 1000000000000000u64.into(), + value: U256::from(1000000000000000u64), input: Default::default(), }), Signature { @@ -331,7 +331,7 @@ mod tests { to: TransactionKind::Call( hex!("d3e8763675e4c425df46cc3b5c0f6cbdac396046").into(), ), - value: 693361000000000u64.into(), + value: U256::from(693361000000000u64), input: Default::default(), }), Signature { @@ -355,7 +355,7 @@ mod tests { to: TransactionKind::Call( hex!("d3e8763675e4c425df46cc3b5c0f6cbdac396046").into(), ), - value: 1000000000000000u64.into(), + value: U256::from(1000000000000000u64), input: Default::default(), }), Signature { @@ -400,7 +400,7 @@ mod tests { to: TransactionKind::Call( hex!("cf7f9e66af820a19257a2108375b180b0ec49167").into(), ), - value: 1234u64.into(), + value: U256::from(1234u64), input: Default::default(), }), Signature { @@ -425,7 +425,7 @@ mod tests { to: TransactionKind::Call( hex!("61815774383099e24810ab832a5b2a5425c154d5").into(), ), - value: 3000000000000000000u64.into(), + value: U256::from(3000000000000000000u64), input: Default::default(), access_list: Default::default(), }), @@ -450,7 +450,7 @@ mod tests { to: TransactionKind::Call( hex!("d3e8763675e4c425df46cc3b5c0f6cbdac396046").into(), ), - value: 1000000000000000u64.into(), + value: U256::from(1000000000000000u64), input: Default::default(), }), Signature { @@ -474,7 +474,7 @@ mod tests { to: TransactionKind::Call( hex!("d3e8763675e4c425df46cc3b5c0f6cbdac396046").into(), ), - value: 693361000000000u64.into(), + value: U256::from(693361000000000u64), input: Default::default(), }), Signature { @@ -498,7 +498,7 @@ mod tests { to: TransactionKind::Call( hex!("d3e8763675e4c425df46cc3b5c0f6cbdac396046").into(), ), - value: 1000000000000000u64.into(), + value: U256::from(1000000000000000u64), input: Default::default(), }), Signature { diff --git a/crates/net/network/tests/it/requests.rs b/crates/net/network/tests/it/requests.rs index 535cf4167530..decc9ee2507e 100644 --- a/crates/net/network/tests/it/requests.rs +++ b/crates/net/network/tests/it/requests.rs @@ -26,7 +26,7 @@ pub fn rng_transaction(rng: &mut impl rand::RngCore) -> TransactionSigned { gas_price: rng.gen(), gas_limit: rng.gen(), to: TransactionKind::Create, - value: rng.gen::().into(), + value: U256::from(rng.gen::()), input: Bytes::from(vec![1, 2]), access_list: Default::default(), }); diff --git a/crates/node-builder/Cargo.toml b/crates/node-builder/Cargo.toml index c9e122e2c41e..a7375ba39348 100644 --- a/crates/node-builder/Cargo.toml +++ b/crates/node-builder/Cargo.toml @@ -29,7 +29,7 @@ reth-transaction-pool.workspace = true reth-tasks.workspace = true reth-tracing.workspace = true reth-interfaces.workspace = true -reth-snapshot.workspace = true +reth-static-file.workspace = true reth-prune.workspace = true reth-stages.workspace = true reth-config.workspace = true diff --git a/crates/node-builder/src/builder.rs b/crates/node-builder/src/builder.rs index d7ce7fddf44a..5ed549afd508 100644 --- a/crates/node-builder/src/builder.rs +++ b/crates/node-builder/src/builder.rs @@ -350,22 +350,22 @@ where info!(target: "reth::cli", "Database opened"); - let mut provider_factory = - ProviderFactory::new(database.clone(), Arc::clone(&config.chain)); - - // configure snapshotter - let snapshotter = reth_snapshot::Snapshotter::new( - provider_factory.clone(), - data_dir.snapshots_path(), - config.chain.snapshot_block_interval, + let provider_factory = ProviderFactory::new( + database.clone(), + Arc::clone(&config.chain), + data_dir.static_files_path(), )?; - provider_factory = provider_factory - .with_snapshots(data_dir.snapshots_path(), snapshotter.highest_snapshot_receiver())?; + // configure static_file_producer + let static_file_producer = reth_static_file::StaticFileProducer::new( + provider_factory.clone(), + provider_factory.static_file_provider(), + config.prune_config()?.unwrap_or_default().segments, + ); debug!(target: "reth::cli", chain=%config.chain.chain, genesis=?config.chain.genesis_hash(), "Initializing genesis"); - let genesis_hash = init_genesis(database.clone(), config.chain.clone())?; + let genesis_hash = init_genesis(provider_factory.clone())?; info!(target: "reth::cli", "{}", config.chain.display_hardforks()); @@ -471,6 +471,7 @@ where sync_metrics_tx, prune_config.clone(), max_block, + static_file_producer, evm_config, ) .await?; @@ -492,6 +493,7 @@ where sync_metrics_tx, prune_config.clone(), max_block, + static_file_producer, evm_config, ) .await?; @@ -508,7 +510,7 @@ where let mut pruner = PrunerBuilder::new(prune_config.clone()) .max_reorg_depth(tree_config.max_reorg_depth() as usize) .prune_delete_limit(config.chain.prune_delete_limit) - .build(provider_factory, snapshotter.highest_snapshot_receiver()); + .build(provider_factory); let events = pruner.events(); hooks.add(PruneHook::new(pruner, Box::new(executor.clone()))); diff --git a/crates/node-core/Cargo.toml b/crates/node-core/Cargo.toml index f252e0483100..308ae00b15bc 100644 --- a/crates/node-core/Cargo.toml +++ b/crates/node-core/Cargo.toml @@ -44,7 +44,7 @@ reth-stages.workspace = true reth-prune.workspace = true reth-blockchain-tree.workspace = true revm-inspectors.workspace = true -reth-snapshot.workspace = true +reth-static-file.workspace = true reth-eth-wire.workspace = true # `optimism` feature @@ -69,6 +69,7 @@ thiserror.workspace = true const-str = "0.5.6" rand.workspace = true pin-project.workspace = true +derive_more.workspace = true # io dirs-next = "2.0.0" diff --git a/crates/node-core/src/args/stage_args.rs b/crates/node-core/src/args/stage_args.rs index 46618ff2f726..d90eabcfc1d0 100644 --- a/crates/node-core/src/args/stage_args.rs +++ b/crates/node-core/src/args/stage_args.rs @@ -1,9 +1,10 @@ //! Shared arguments related to stages +use derive_more::Display; /// Represents a specific stage within the data pipeline. /// /// Different stages within the pipeline have dedicated functionalities and operations. -#[derive(Debug, Clone, Copy, Eq, PartialEq, PartialOrd, Ord, clap::ValueEnum)] +#[derive(Debug, Clone, Copy, Eq, PartialEq, PartialOrd, Ord, clap::ValueEnum, Display)] pub enum StageEnum { /// The headers stage within the pipeline. /// @@ -49,8 +50,4 @@ pub enum StageEnum { /// /// Manages historical data related to storage. StorageHistory, - /// The total difficulty stage within the pipeline. - /// - /// Handles computations and data related to total difficulty. - TotalDifficulty, } diff --git a/crates/node-core/src/dirs.rs b/crates/node-core/src/dirs.rs index b4e6d8955bb6..affcb7f8a6d3 100644 --- a/crates/node-core/src/dirs.rs +++ b/crates/node-core/src/dirs.rs @@ -282,9 +282,9 @@ impl ChainPath { self.0.join("db").into() } - /// Returns the path to the snapshots directory for this chain. - pub fn snapshots_path(&self) -> PathBuf { - self.0.join("snapshots").into() + /// Returns the path to the static_files directory for this chain. + pub fn static_files_path(&self) -> PathBuf { + self.0.join("static_files").into() } /// Returns the path to the reth p2p secret key for this chain. diff --git a/crates/node-core/src/events/node.rs b/crates/node-core/src/events/node.rs index 00b5b7fb54d2..0de1987a7c6e 100644 --- a/crates/node-core/src/events/node.rs +++ b/crates/node-core/src/events/node.rs @@ -14,6 +14,7 @@ use reth_primitives::{ }; use reth_prune::PrunerEvent; use reth_stages::{ExecOutput, PipelineEvent}; +use reth_static_file::StaticFileProducerEvent; use std::{ fmt::{Display, Formatter}, future::Future, @@ -233,11 +234,25 @@ impl NodeState { fn handle_pruner_event(&self, event: PrunerEvent) { match event { + PrunerEvent::Started { tip_block_number } => { + info!(tip_block_number, "Pruner started"); + } PrunerEvent::Finished { tip_block_number, elapsed, stats } => { info!(tip_block_number, ?elapsed, ?stats, "Pruner finished"); } } } + + fn handle_static_file_producer_event(&self, event: StaticFileProducerEvent) { + match event { + StaticFileProducerEvent::Started { targets } => { + info!(?targets, "Static File Producer started"); + } + StaticFileProducerEvent::Finished { targets, elapsed } => { + info!(?targets, ?elapsed, "Static File Producer finished"); + } + } + } } impl NodeState { @@ -282,6 +297,8 @@ pub enum NodeEvent { ConsensusLayerHealth(ConsensusLayerHealthEvent), /// A pruner event Pruner(PrunerEvent), + /// A static_file_producer event + StaticFileProducer(StaticFileProducerEvent), } impl From for NodeEvent { @@ -314,6 +331,12 @@ impl From for NodeEvent { } } +impl From for NodeEvent { + fn from(event: StaticFileProducerEvent) -> Self { + NodeEvent::StaticFileProducer(event) + } +} + /// Displays relevant information to the user from components of the node, and periodically /// displays the high-level status of the node. pub async fn handle_events( @@ -430,6 +453,9 @@ where NodeEvent::Pruner(event) => { this.state.handle_pruner_event(event); } + NodeEvent::StaticFileProducer(event) => { + this.state.handle_static_file_producer_event(event); + } } } diff --git a/crates/node-core/src/init.rs b/crates/node-core/src/init.rs index dd0eca9f3522..8b7a0c08f2e9 100644 --- a/crates/node-core/src/init.rs +++ b/crates/node-core/src/init.rs @@ -1,19 +1,20 @@ //! Reth genesis initialization utility functions. use reth_db::{ - cursor::DbCursorRO, database::Database, tables, transaction::{DbTx, DbTxMut}, }; use reth_interfaces::{db::DatabaseError, provider::ProviderResult}; use reth_primitives::{ - stage::StageId, Account, Bytecode, ChainSpec, Receipts, StorageEntry, B256, U256, + stage::StageId, Account, Bytecode, ChainSpec, Receipts, StaticFileSegment, StorageEntry, B256, + U256, }; use reth_provider::{ bundle_state::{BundleStateInit, RevertsInit}, - BundleStateWithReceipts, DatabaseProviderRW, HashingWriter, HistoryWriter, OriginalValuesKnown, - ProviderError, ProviderFactory, + providers::{StaticFileProvider, StaticFileWriter}, + BlockHashReader, BundleStateWithReceipts, ChainSpecProvider, DatabaseProviderRW, HashingWriter, + HistoryWriter, OriginalValuesKnown, ProviderError, ProviderFactory, }; use std::{ collections::{BTreeMap, HashMap}, @@ -46,49 +47,49 @@ impl From for InitDatabaseError { } /// Write the genesis block if it has not already been written -pub fn init_genesis( - db: DB, - chain: Arc, -) -> Result { - let genesis = chain.genesis(); +pub fn init_genesis(factory: ProviderFactory) -> Result { + let chain = factory.chain_spec(); + let genesis = chain.genesis(); let hash = chain.genesis_hash(); - let tx = db.tx()?; - if let Some((_, db_hash)) = tx.cursor_read::()?.first()? { - if db_hash == hash { - debug!("Genesis already written, skipping."); - return Ok(hash) - } + // Check if we already have the genesis header or if we have the wrong one. + match factory.block_hash(0) { + Ok(None) | Err(ProviderError::MissingStaticFileBlock(StaticFileSegment::Headers, 0)) => {} + Ok(Some(block_hash)) => { + if block_hash == hash { + debug!("Genesis already written, skipping."); + return Ok(hash) + } - return Err(InitDatabaseError::GenesisHashMismatch { - chainspec_hash: hash, - database_hash: db_hash, - }) + return Err(InitDatabaseError::GenesisHashMismatch { + chainspec_hash: hash, + database_hash: block_hash, + }) + } + Err(e) => return Err(dbg!(e).into()), } - drop(tx); debug!("Writing genesis block."); // use transaction to insert genesis header - let factory = ProviderFactory::new(&db, chain.clone()); let provider_rw = factory.provider_rw()?; insert_genesis_hashes(&provider_rw, genesis)?; insert_genesis_history(&provider_rw, genesis)?; - provider_rw.commit()?; // Insert header - let tx = db.tx_mut()?; - insert_genesis_header::(&tx, chain.clone())?; + let tx = provider_rw.into_tx(); + insert_genesis_header::(&tx, factory.static_file_provider(), chain.clone())?; insert_genesis_state::(&tx, genesis)?; // insert sync stage for stage in StageId::ALL.iter() { - tx.put::(stage.to_string(), Default::default())?; + tx.put::(stage.to_string(), Default::default())?; } tx.commit()?; + Ok(hash) } @@ -153,14 +154,14 @@ pub fn insert_genesis_state( 0, ); - bundle.write_to_db(tx, OriginalValuesKnown::Yes)?; + bundle.write_to_storage(tx, None, OriginalValuesKnown::Yes)?; Ok(()) } /// Inserts hashes for the genesis state. pub fn insert_genesis_hashes( - provider: &DatabaseProviderRW<&DB>, + provider: &DatabaseProviderRW, genesis: &reth_primitives::Genesis, ) -> ProviderResult<()> { // insert and hash accounts to hashing table @@ -187,7 +188,7 @@ pub fn insert_genesis_hashes( /// Inserts history indices for genesis accounts and storage. pub fn insert_genesis_history( - provider: &DatabaseProviderRW<&DB>, + provider: &DatabaseProviderRW, genesis: &reth_primitives::Genesis, ) -> ProviderResult<()> { let account_transitions = @@ -208,15 +209,24 @@ pub fn insert_genesis_history( /// Inserts header for the genesis state. pub fn insert_genesis_header( tx: &::TXMut, + static_file_provider: StaticFileProvider, chain: Arc, ) -> ProviderResult<()> { let (header, block_hash) = chain.sealed_genesis_header().split(); - tx.put::(0, block_hash)?; + match static_file_provider.block_hash(0) { + Ok(None) | Err(ProviderError::MissingStaticFileBlock(StaticFileSegment::Headers, 0)) => { + let (difficulty, hash) = (header.difficulty, block_hash); + let mut writer = static_file_provider.latest_writer(StaticFileSegment::Headers)?; + writer.append_header(header, difficulty, hash)?; + writer.commit()?; + } + Ok(Some(_)) => {} + Err(e) => return Err(e), + } + tx.put::(block_hash, 0)?; tx.put::(0, Default::default())?; - tx.put::(0, header.difficulty.into())?; - tx.put::(0, header)?; Ok(()) } @@ -226,15 +236,16 @@ mod tests { use super::*; use reth_db::{ + cursor::DbCursorRO, models::{storage_sharded_key::StorageShardedKey, ShardedKey}, table::{Table, TableRow}, - test_utils::create_test_rw_db, DatabaseEnv, }; use reth_primitives::{ Address, Chain, ForkTimestamps, Genesis, GenesisAccount, IntegerList, GOERLI, GOERLI_GENESIS_HASH, MAINNET, MAINNET_GENESIS_HASH, SEPOLIA, SEPOLIA_GENESIS_HASH, }; + use reth_provider::test_utils::create_test_provider_factory_with_chain_spec; fn collect_table_entries( tx: &::TX, @@ -248,8 +259,8 @@ mod tests { #[test] fn success_init_genesis_mainnet() { - let db = create_test_rw_db(); - let genesis_hash = init_genesis(db, MAINNET.clone()).unwrap(); + let genesis_hash = + init_genesis(create_test_provider_factory_with_chain_spec(MAINNET.clone())).unwrap(); // actual, expected assert_eq!(genesis_hash, MAINNET_GENESIS_HASH); @@ -257,8 +268,8 @@ mod tests { #[test] fn success_init_genesis_goerli() { - let db = create_test_rw_db(); - let genesis_hash = init_genesis(db, GOERLI.clone()).unwrap(); + let genesis_hash = + init_genesis(create_test_provider_factory_with_chain_spec(GOERLI.clone())).unwrap(); // actual, expected assert_eq!(genesis_hash, GOERLI_GENESIS_HASH); @@ -266,8 +277,8 @@ mod tests { #[test] fn success_init_genesis_sepolia() { - let db = create_test_rw_db(); - let genesis_hash = init_genesis(db, SEPOLIA.clone()).unwrap(); + let genesis_hash = + init_genesis(create_test_provider_factory_with_chain_spec(SEPOLIA.clone())).unwrap(); // actual, expected assert_eq!(genesis_hash, SEPOLIA_GENESIS_HASH); @@ -275,11 +286,19 @@ mod tests { #[test] fn fail_init_inconsistent_db() { - let db = create_test_rw_db(); - init_genesis(db.clone(), SEPOLIA.clone()).unwrap(); + let factory = create_test_provider_factory_with_chain_spec(SEPOLIA.clone()); + let static_file_provider = factory.static_file_provider(); + init_genesis(factory.clone()).unwrap(); // Try to init db with a different genesis block - let genesis_hash = init_genesis(db, MAINNET.clone()); + let genesis_hash = init_genesis( + ProviderFactory::new( + factory.into_db(), + MAINNET.clone(), + static_file_provider.path().into(), + ) + .unwrap(), + ); assert_eq!( genesis_hash.unwrap_err(), @@ -321,13 +340,15 @@ mod tests { ..Default::default() }); - let db = create_test_rw_db(); - init_genesis(db.clone(), chain_spec).unwrap(); + let factory = create_test_provider_factory_with_chain_spec(chain_spec); + init_genesis(factory.clone()).unwrap(); + + let provider = factory.provider().unwrap(); - let tx = db.tx().expect("failed to init tx"); + let tx = provider.tx_ref(); assert_eq!( - collect_table_entries::, tables::AccountHistory>(&tx) + collect_table_entries::, tables::AccountsHistory>(tx) .expect("failed to collect"), vec![ (ShardedKey::new(address_with_balance, u64::MAX), IntegerList::new([0]).unwrap()), @@ -336,7 +357,7 @@ mod tests { ); assert_eq!( - collect_table_entries::, tables::StorageHistory>(&tx) + collect_table_entries::, tables::StoragesHistory>(tx) .expect("failed to collect"), vec![( StorageShardedKey::new(address_with_storage, storage_key, u64::MAX), diff --git a/crates/node-core/src/node_config.rs b/crates/node-core/src/node_config.rs index b7317b745dd3..1b5a8da2a74a 100644 --- a/crates/node-core/src/node_config.rs +++ b/crates/node-core/src/node_config.rs @@ -57,10 +57,11 @@ use reth_stages::{ stages::{ AccountHashingStage, ExecutionStage, ExecutionStageThresholds, IndexAccountHistoryStage, IndexStorageHistoryStage, MerkleStage, SenderRecoveryStage, StorageHashingStage, - TotalDifficultyStage, TransactionLookupStage, + TransactionLookupStage, }, MetricEvent, }; +use reth_static_file::StaticFileProducer; use reth_tasks::TaskExecutor; use reth_transaction_pool::{ blobstore::{DiskFileBlobStore, DiskFileBlobStoreConfig}, @@ -546,6 +547,7 @@ impl NodeConfig { metrics_tx: reth_stages::MetricEventsSender, prune_config: Option, max_block: Option, + static_file_producer: StaticFileProducer, evm_config: EvmConfig, ) -> eyre::Result> where @@ -573,6 +575,7 @@ impl NodeConfig { self.debug.continuous, metrics_tx, prune_config, + static_file_producer, evm_config, ) .await?; @@ -794,6 +797,7 @@ impl NodeConfig { continuous: bool, metrics_tx: reth_stages::MetricEventsSender, prune_config: Option, + static_file_producer: StaticFileProducer, evm_config: EvmConfig, ) -> eyre::Result> where @@ -843,11 +847,7 @@ impl NodeConfig { header_downloader, body_downloader, factory.clone(), - ) - .set( - TotalDifficultyStage::new(consensus) - .with_commit_threshold(stage_config.total_difficulty.commit_threshold), - ) + )? .set(SenderRecoveryStage { commit_threshold: stage_config.sender_recovery.commit_threshold, }) @@ -879,7 +879,7 @@ impl NodeConfig { )) .set(MerkleStage::new_execution(stage_config.merkle.clean_threshold)) .set(TransactionLookupStage::new( - stage_config.transaction_lookup.commit_threshold, + stage_config.transaction_lookup.chunk_size, prune_modes.transaction_lookup, )) .set(IndexAccountHistoryStage::new( @@ -891,7 +891,7 @@ impl NodeConfig { prune_modes.storage_history, )), ) - .build(provider_factory); + .build(provider_factory, static_file_producer); Ok(pipeline) } diff --git a/crates/primitives/Cargo.toml b/crates/primitives/Cargo.toml index 09bb13362dd9..1d7468560206 100644 --- a/crates/primitives/Cargo.toml +++ b/crates/primitives/Cargo.toml @@ -41,7 +41,7 @@ tracing.workspace = true bytes.workspace = true byteorder = "1" clap = { workspace = true, features = ["derive"], optional = true } -derive_more = "0.99" +derive_more.workspace = true itertools.workspace = true modular-bitfield = "0.11.2" num_enum = "0.7" @@ -50,10 +50,10 @@ rayon.workspace = true serde.workspace = true serde_json.workspace = true sha2 = "0.10.7" -sucds = "~0.6" tempfile.workspace = true thiserror.workspace = true zstd = { version = "0.12", features = ["experimental"] } +roaring = "0.10.2" cfg-if = "1.0.0" # `test-utils` feature @@ -83,6 +83,9 @@ triehash = "0.8" hash-db = "~0.15" plain_hasher = "0.2" +sucds = "0.8.1" +anyhow = "1.0.75" + # necessary so we don't hit a "undeclared 'std'": # https://github.com/paradigmxyz/reth/pull/177#discussion_r1021172198 criterion.workspace = true @@ -126,3 +129,7 @@ harness = false name = "trie_root" required-features = ["arbitrary", "test-utils"] harness = false + +[[bench]] +name = "integer_list" +harness = false diff --git a/crates/primitives/benches/integer_list.rs b/crates/primitives/benches/integer_list.rs new file mode 100644 index 000000000000..c07dbaa9d0e7 --- /dev/null +++ b/crates/primitives/benches/integer_list.rs @@ -0,0 +1,250 @@ +#![allow(missing_docs)] +use criterion::{black_box, criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion}; +use rand::prelude::*; + +pub fn new_pre_sorted(c: &mut Criterion) { + let mut group = c.benchmark_group("new_pre_sorted"); + + for delta in [1, 100, 1000, 10000] { + let integers_usize = generate_integers(2000, delta); + assert_eq!(integers_usize.len(), 2000); + + let integers_u64 = integers_usize.iter().map(|v| *v as u64).collect::>(); + assert_eq!(integers_u64.len(), 2000); + + group.bench_function(BenchmarkId::new("Elias-Fano", delta), |b| { + b.iter(|| elias_fano::IntegerList::new_pre_sorted(black_box(&integers_usize))); + }); + + group.bench_function(BenchmarkId::new("Roaring Bitmaps", delta), |b| { + b.iter(|| reth_primitives::IntegerList::new_pre_sorted(black_box(&integers_u64))); + }); + } +} + +pub fn rank_select(c: &mut Criterion) { + let mut group = c.benchmark_group("rank + select"); + + for delta in [1, 100, 1000, 10000] { + let integers_usize = generate_integers(2000, delta); + assert_eq!(integers_usize.len(), 2000); + + let integers_u64 = integers_usize.iter().map(|v| *v as u64).collect::>(); + assert_eq!(integers_u64.len(), 2000); + + group.bench_function(BenchmarkId::new("Elias-Fano", delta), |b| { + b.iter_batched( + || { + let (index, element) = + integers_usize.iter().enumerate().choose(&mut thread_rng()).unwrap(); + (elias_fano::IntegerList::new_pre_sorted(&integers_usize).0, index, *element) + }, + |(list, index, element)| { + let list = list.enable_rank(); + list.rank(element); + list.select(index); + }, + BatchSize::PerIteration, + ); + }); + + group.bench_function(BenchmarkId::new("Roaring Bitmaps", delta), |b| { + b.iter_batched( + || { + let (index, element) = + integers_u64.iter().enumerate().choose(&mut thread_rng()).unwrap(); + ( + reth_primitives::IntegerList::new_pre_sorted(&integers_u64), + index as u64, + *element, + ) + }, + |(list, index, element)| { + list.rank(element); + list.select(index); + }, + BatchSize::PerIteration, + ); + }); + } +} + +fn generate_integers(n: usize, delta: usize) -> Vec { + (0..n).fold(Vec::new(), |mut vec, _| { + vec.push(vec.last().map_or(0, |last| { + last + thread_rng().gen_range(delta - delta / 2..=delta + delta / 2) + })); + vec + }) +} + +criterion_group! { + name = benches; + config = Criterion::default(); + targets = new_pre_sorted, rank_select +} +criterion_main!(benches); + +/// Implementation from https://github.com/paradigmxyz/reth/blob/cda5d4e7c53ccc898b7725eb5d3b46c35e4da7f8/crates/primitives/src/integer_list.rs +/// adapted to work with `sucds = "0.8.1"` +#[allow(unused, unreachable_pub)] +mod elias_fano { + use std::{fmt, ops::Deref}; + use sucds::{mii_sequences::EliasFano, Serializable}; + + #[derive(Clone, PartialEq, Eq, Default)] + pub struct IntegerList(pub EliasFano); + + impl Deref for IntegerList { + type Target = EliasFano; + + fn deref(&self) -> &Self::Target { + &self.0 + } + } + + impl fmt::Debug for IntegerList { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let vec: Vec = self.0.iter(0).collect(); + write!(f, "IntegerList {:?}", vec) + } + } + + impl IntegerList { + /// Creates an IntegerList from a list of integers. `usize` is safe to use since + /// [`sucds::EliasFano`] restricts its compilation to 64bits. + /// + /// # Returns + /// + /// Returns an error if the list is empty or not pre-sorted. + pub fn new>(list: T) -> Result { + let mut builder = EliasFanoBuilder::new( + list.as_ref().iter().max().map_or(0, |max| max + 1), + list.as_ref().len(), + )?; + builder.extend(list.as_ref().iter().copied()); + Ok(Self(builder.build())) + } + + // Creates an IntegerList from a pre-sorted list of integers. `usize` is safe to use since + /// [`sucds::EliasFano`] restricts its compilation to 64bits. + /// + /// # Panics + /// + /// Panics if the list is empty or not pre-sorted. + pub fn new_pre_sorted>(list: T) -> Self { + Self::new(list).expect("IntegerList must be pre-sorted and non-empty.") + } + + /// Serializes a [`IntegerList`] into a sequence of bytes. + pub fn to_bytes(&self) -> Vec { + let mut vec = Vec::with_capacity(self.0.size_in_bytes()); + self.0.serialize_into(&mut vec).expect("not able to encode integer list."); + vec + } + + /// Serializes a [`IntegerList`] into a sequence of bytes. + pub fn to_mut_bytes(&self, buf: &mut B) { + let len = self.0.size_in_bytes(); + let mut vec = Vec::with_capacity(len); + self.0.serialize_into(&mut vec).unwrap(); + buf.put_slice(vec.as_slice()); + } + + /// Deserializes a sequence of bytes into a proper [`IntegerList`]. + pub fn from_bytes(data: &[u8]) -> Result { + Ok(Self( + EliasFano::deserialize_from(data).map_err(|_| EliasFanoError::FailedDeserialize)?, + )) + } + } + + macro_rules! impl_uint { + ($($w:tt),+) => { + $( + impl From> for IntegerList { + fn from(v: Vec<$w>) -> Self { + let v: Vec = v.iter().map(|v| *v as usize).collect(); + Self::new(v.as_slice()).expect("could not create list.") + } + } + )+ + }; + } + + impl_uint!(usize, u64, u32, u8, u16); + + impl Serialize for IntegerList { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let vec = self.0.iter(0).collect::>(); + let mut seq = serializer.serialize_seq(Some(self.len()))?; + for e in vec { + seq.serialize_element(&e)?; + } + seq.end() + } + } + + struct IntegerListVisitor; + impl<'de> Visitor<'de> for IntegerListVisitor { + type Value = IntegerList; + + fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + formatter.write_str("a usize array") + } + + fn visit_seq(self, mut seq: E) -> Result + where + E: SeqAccess<'de>, + { + let mut list = Vec::new(); + while let Some(item) = seq.next_element()? { + list.push(item); + } + + IntegerList::new(list) + .map_err(|_| serde::de::Error::invalid_value(Unexpected::Seq, &self)) + } + } + + impl<'de> Deserialize<'de> for IntegerList { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + deserializer.deserialize_byte_buf(IntegerListVisitor) + } + } + + #[cfg(any(test, feature = "arbitrary"))] + use arbitrary::{Arbitrary, Unstructured}; + use serde::{ + de::{SeqAccess, Unexpected, Visitor}, + ser::SerializeSeq, + Deserialize, Deserializer, Serialize, Serializer, + }; + use sucds::mii_sequences::EliasFanoBuilder; + + #[cfg(any(test, feature = "arbitrary"))] + impl<'a> Arbitrary<'a> for IntegerList { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + let mut nums: Vec = Vec::arbitrary(u)?; + nums.sort(); + Self::new(&nums).map_err(|_| arbitrary::Error::IncorrectFormat) + } + } + + /// Primitives error type. + #[derive(Debug, thiserror::Error)] + pub enum EliasFanoError { + /// The provided input is invalid. + #[error(transparent)] + InvalidInput(#[from] anyhow::Error), + /// Failed to deserialize data into type. + #[error("failed to deserialize data into type")] + FailedDeserialize, + } +} diff --git a/crates/primitives/src/chain/spec.rs b/crates/primitives/src/chain/spec.rs index 8caaf16cc338..a1b535cafdc9 100644 --- a/crates/primitives/src/chain/spec.rs +++ b/crates/primitives/src/chain/spec.rs @@ -67,7 +67,6 @@ pub static MAINNET: Lazy> = Lazy::new(|| { )), base_fee_params: BaseFeeParamsKind::Constant(BaseFeeParams::ethereum()), prune_delete_limit: 3500, - snapshot_block_interval: 500_000, } .into() }); @@ -111,7 +110,6 @@ pub static GOERLI: Lazy> = Lazy::new(|| { )), base_fee_params: BaseFeeParamsKind::Constant(BaseFeeParams::ethereum()), prune_delete_limit: 1700, - snapshot_block_interval: 1_000_000, } .into() }); @@ -159,7 +157,6 @@ pub static SEPOLIA: Lazy> = Lazy::new(|| { )), base_fee_params: BaseFeeParamsKind::Constant(BaseFeeParams::ethereum()), prune_delete_limit: 1700, - snapshot_block_interval: 1_000_000, } .into() }); @@ -202,7 +199,6 @@ pub static HOLESKY: Lazy> = Lazy::new(|| { )), base_fee_params: BaseFeeParamsKind::Constant(BaseFeeParams::ethereum()), prune_delete_limit: 1700, - snapshot_block_interval: 1_000_000, } .into() }); @@ -296,7 +292,6 @@ pub static BASE_SEPOLIA: Lazy> = Lazy::new(|| { .into(), ), prune_delete_limit: 1700, - snapshot_block_interval: 1_000_000, ..Default::default() } .into() @@ -351,7 +346,6 @@ pub static BASE_MAINNET: Lazy> = Lazy::new(|| { .into(), ), prune_delete_limit: 1700, - snapshot_block_interval: 1_000_000, ..Default::default() } .into() @@ -502,9 +496,6 @@ pub struct ChainSpec { /// data coming in. #[serde(default)] pub prune_delete_limit: usize, - - /// The block interval for creating snapshots. Each snapshot will have that much blocks in it. - pub snapshot_block_interval: u64, } impl Default for ChainSpec { @@ -519,7 +510,6 @@ impl Default for ChainSpec { deposit_contract: Default::default(), base_fee_params: BaseFeeParamsKind::Constant(BaseFeeParams::ethereum()), prune_delete_limit: MAINNET.prune_delete_limit, - snapshot_block_interval: Default::default(), } } } diff --git a/crates/primitives/src/header.rs b/crates/primitives/src/header.rs index 87589a944c6a..5f7c937bcd49 100644 --- a/crates/primitives/src/header.rs +++ b/crates/primitives/src/header.rs @@ -18,6 +18,7 @@ use proptest::prelude::*; use reth_codecs::{add_arbitrary_tests, derive_arbitrary, main_codec, Compact}; use serde::{Deserialize, Serialize}; use std::{mem, ops::Deref}; + /// Errors that can occur during header sanity checks. #[derive(Debug, PartialEq)] pub enum HeaderError { @@ -574,13 +575,14 @@ pub enum HeaderValidationError { /// A [`Header`] that is sealed at a precalculated hash, use [`SealedHeader::unseal()`] if you want /// to modify header. -#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] -#[add_arbitrary_tests(rlp)] +#[main_codec(no_arbitrary)] +#[add_arbitrary_tests(rlp, compact)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct SealedHeader { - /// Locked Header fields. - header: Header, /// Locked Header hash. hash: BlockHash, + /// Locked Header fields. + header: Header, } impl SealedHeader { diff --git a/crates/primitives/src/integer_list.rs b/crates/primitives/src/integer_list.rs index abbe6091740a..f53ee41b3941 100644 --- a/crates/primitives/src/integer_list.rs +++ b/crates/primitives/src/integer_list.rs @@ -1,18 +1,19 @@ +use bytes::BufMut; +use roaring::RoaringTreemap; use serde::{ de::{SeqAccess, Unexpected, Visitor}, ser::SerializeSeq, Deserialize, Deserializer, Serialize, Serializer, }; use std::{fmt, ops::Deref}; -use sucds::{EliasFano, Searial}; -/// Uses EliasFano to hold a list of integers. It provides really good compression with the +/// Uses Roaring Bitmaps to hold a list of integers. It provides really good compression with the /// capability to access its elements without decoding it. -#[derive(Clone, PartialEq, Eq, Default)] -pub struct IntegerList(pub EliasFano); +#[derive(Clone, PartialEq, Default)] +pub struct IntegerList(pub RoaringTreemap); impl Deref for IntegerList { - type Target = EliasFano; + type Target = RoaringTreemap; fn deref(&self) -> &Self::Target { &self.0 @@ -21,53 +22,54 @@ impl Deref for IntegerList { impl fmt::Debug for IntegerList { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let vec: Vec = self.0.iter(0).collect(); + let vec: Vec = self.0.iter().collect(); write!(f, "IntegerList {:?}", vec) } } impl IntegerList { - /// Creates an IntegerList from a list of integers. `usize` is safe to use since - /// [`sucds::EliasFano`] restricts its compilation to 64bits. + /// Creates an IntegerList from a list of integers. /// /// # Returns /// /// Returns an error if the list is empty or not pre-sorted. - pub fn new>(list: T) -> Result { - Ok(Self(EliasFano::from_ints(list.as_ref()).map_err(|_| EliasFanoError::InvalidInput)?)) + pub fn new>(list: T) -> Result { + Ok(Self( + RoaringTreemap::from_sorted_iter(list.as_ref().iter().copied()) + .map_err(|_| RoaringBitmapError::InvalidInput)?, + )) } - // Creates an IntegerList from a pre-sorted list of integers. `usize` is safe to use since - /// [`sucds::EliasFano`] restricts its compilation to 64bits. + // Creates an IntegerList from a pre-sorted list of integers. /// /// # Panics /// /// Panics if the list is empty or not pre-sorted. - pub fn new_pre_sorted>(list: T) -> Self { + pub fn new_pre_sorted>(list: T) -> Self { Self( - EliasFano::from_ints(list.as_ref()) - .expect("IntegerList must be pre-sorted and non-empty."), + RoaringTreemap::from_sorted_iter(list.as_ref().iter().copied()) + .expect("IntegerList must be pre-sorted and non-empty"), ) } /// Serializes a [`IntegerList`] into a sequence of bytes. pub fn to_bytes(&self) -> Vec { - let mut vec = Vec::with_capacity(self.0.size_in_bytes()); - self.0.serialize_into(&mut vec).expect("not able to encode integer list."); + let mut vec = Vec::with_capacity(self.0.serialized_size()); + self.0.serialize_into(&mut vec).expect("not able to encode IntegerList"); vec } /// Serializes a [`IntegerList`] into a sequence of bytes. pub fn to_mut_bytes(&self, buf: &mut B) { - let len = self.0.size_in_bytes(); - let mut vec = Vec::with_capacity(len); - self.0.serialize_into(&mut vec).unwrap(); - buf.put_slice(vec.as_slice()); + self.0.serialize_into(buf.writer()).unwrap(); } /// Deserializes a sequence of bytes into a proper [`IntegerList`]. - pub fn from_bytes(data: &[u8]) -> Result { - Ok(Self(EliasFano::deserialize_from(data).map_err(|_| EliasFanoError::FailedDeserialize)?)) + pub fn from_bytes(data: &[u8]) -> Result { + Ok(Self( + RoaringTreemap::deserialize_from(data) + .map_err(|_| RoaringBitmapError::FailedToDeserialize)?, + )) } } @@ -76,8 +78,7 @@ macro_rules! impl_uint { $( impl From> for IntegerList { fn from(v: Vec<$w>) -> Self { - let v: Vec = v.iter().map(|v| *v as usize).collect(); - Self(EliasFano::from_ints(v.as_slice()).expect("could not create list.")) + Self::new_pre_sorted(v.iter().map(|v| *v as u64).collect::>()) } } )+ @@ -91,8 +92,8 @@ impl Serialize for IntegerList { where S: Serializer, { - let vec = self.0.iter(0).collect::>(); - let mut seq = serializer.serialize_seq(Some(self.len()))?; + let vec = self.0.iter().collect::>(); + let mut seq = serializer.serialize_seq(Some(self.len() as usize))?; for e in vec { seq.serialize_element(&e)?; } @@ -136,21 +137,21 @@ use arbitrary::{Arbitrary, Unstructured}; #[cfg(any(test, feature = "arbitrary"))] impl<'a> Arbitrary<'a> for IntegerList { fn arbitrary(u: &mut Unstructured<'a>) -> Result { - let mut nums: Vec = Vec::arbitrary(u)?; + let mut nums: Vec = Vec::arbitrary(u)?; nums.sort(); - Ok(Self(EliasFano::from_ints(&nums).map_err(|_| arbitrary::Error::IncorrectFormat)?)) + Self::new(nums).map_err(|_| arbitrary::Error::IncorrectFormat) } } /// Primitives error type. #[derive(Debug, thiserror::Error)] -pub enum EliasFanoError { +pub enum RoaringBitmapError { /// The provided input is invalid. #[error("the provided input is invalid")] InvalidInput, /// Failed to deserialize data into type. #[error("failed to deserialize data into type")] - FailedDeserialize, + FailedToDeserialize, } #[cfg(test)] @@ -161,7 +162,7 @@ mod tests { fn test_integer_list() { let original_list = [1, 2, 3]; let ef_list = IntegerList::new(original_list).unwrap(); - assert_eq!(ef_list.iter(0).collect::>(), original_list); + assert_eq!(ef_list.iter().collect::>(), original_list); } #[test] diff --git a/crates/primitives/src/lib.rs b/crates/primitives/src/lib.rs index b726aa3519f4..72ebcdfc9b0e 100644 --- a/crates/primitives/src/lib.rs +++ b/crates/primitives/src/lib.rs @@ -37,8 +37,8 @@ mod receipt; /// Helpers for working with revm pub mod revm; pub mod serde_helper; -pub mod snapshot; pub mod stage; +pub mod static_file; mod storage; /// Helpers for working with transactions pub mod transaction; @@ -73,11 +73,11 @@ pub use net::{ }; pub use peer::{PeerId, WithPeerId}; pub use prune::{ - PruneCheckpoint, PruneMode, PruneModes, PruneProgress, PruneSegment, PruneSegmentError, - ReceiptsLogPruneConfig, MINIMUM_PRUNING_DISTANCE, + PruneCheckpoint, PruneMode, PruneModes, PruneProgress, PrunePurpose, PruneSegment, + PruneSegmentError, ReceiptsLogPruneConfig, MINIMUM_PRUNING_DISTANCE, }; pub use receipt::{Receipt, ReceiptWithBloom, ReceiptWithBloomRef, Receipts}; -pub use snapshot::SnapshotSegment; +pub use static_file::StaticFileSegment; pub use storage::StorageEntry; #[cfg(feature = "c-kzg")] @@ -92,7 +92,7 @@ pub use transaction::{ AccessList, AccessListItem, FromRecoveredTransaction, IntoRecoveredTransaction, InvalidTransactionError, Signature, Transaction, TransactionKind, TransactionMeta, TransactionSigned, TransactionSignedEcRecovered, TransactionSignedNoHash, TxEip1559, TxEip2930, - TxEip4844, TxHashOrNumber, TxLegacy, TxType, TxValue, EIP1559_TX_TYPE_ID, EIP2930_TX_TYPE_ID, + TxEip4844, TxHashOrNumber, TxLegacy, TxType, EIP1559_TX_TYPE_ID, EIP2930_TX_TYPE_ID, EIP4844_TX_TYPE_ID, LEGACY_TX_TYPE_ID, }; pub use withdrawal::{Withdrawal, Withdrawals}; diff --git a/crates/primitives/src/proofs.rs b/crates/primitives/src/proofs.rs index 1f7fd26635ff..b42a2654822b 100644 --- a/crates/primitives/src/proofs.rs +++ b/crates/primitives/src/proofs.rs @@ -5,9 +5,8 @@ use crate::{ keccak256, trie::{HashBuilder, Nibbles, TrieAccount}, Address, Header, Receipt, ReceiptWithBloom, ReceiptWithBloomRef, TransactionSigned, Withdrawal, - B256, + B256, U256, }; -use alloy_primitives::U256; use alloy_rlp::Encodable; use bytes::{BufMut, BytesMut}; use itertools::Itertools; diff --git a/crates/primitives/src/prune/mod.rs b/crates/primitives/src/prune/mod.rs index 761440072559..b11aef43263a 100644 --- a/crates/primitives/src/prune/mod.rs +++ b/crates/primitives/src/prune/mod.rs @@ -6,7 +6,7 @@ mod target; use crate::{Address, BlockNumber}; pub use checkpoint::PruneCheckpoint; pub use mode::PruneMode; -pub use segment::{PruneSegment, PruneSegmentError}; +pub use segment::{PrunePurpose, PruneSegment, PruneSegmentError}; use serde::{Deserialize, Serialize}; use std::collections::BTreeMap; pub use target::{PruneModes, MINIMUM_PRUNING_DISTANCE}; @@ -53,7 +53,7 @@ impl ReceiptsLogPruneConfig { // Reminder, that we increment because the [`BlockNumber`] key of the new map should be // viewed as `PruneMode::Before(block)` let block = (pruned_block + 1).max( - mode.prune_target_block(tip, PruneSegment::ContractLogs)? + mode.prune_target_block(tip, PruneSegment::ContractLogs, PrunePurpose::User)? .map(|(block, _)| block) .unwrap_or_default() + 1, @@ -76,7 +76,7 @@ impl ReceiptsLogPruneConfig { for (_, mode) in self.0.iter() { if let PruneMode::Distance(_) = mode { if let Some((block, _)) = - mode.prune_target_block(tip, PruneSegment::ContractLogs)? + mode.prune_target_block(tip, PruneSegment::ContractLogs, PrunePurpose::User)? { lowest = Some(lowest.unwrap_or(u64::MAX).min(block)); } diff --git a/crates/primitives/src/prune/mode.rs b/crates/primitives/src/prune/mode.rs index 2dd04473407b..c32f66d35d1e 100644 --- a/crates/primitives/src/prune/mode.rs +++ b/crates/primitives/src/prune/mode.rs @@ -1,4 +1,4 @@ -use crate::{BlockNumber, PruneSegment, PruneSegmentError}; +use crate::{prune::segment::PrunePurpose, BlockNumber, PruneSegment, PruneSegmentError}; use reth_codecs::{main_codec, Compact}; /// Prune mode. @@ -15,21 +15,29 @@ pub enum PruneMode { } impl PruneMode { + /// Prune blocks up to the specified block number. The specified block number is also pruned. + /// + /// This acts as `PruneMode::Before(block_number + 1)`. + pub fn before_inclusive(block_number: BlockNumber) -> Self { + Self::Before(block_number + 1) + } + /// Returns block up to which variant pruning needs to be done, inclusive, according to the /// provided tip. pub fn prune_target_block( &self, tip: BlockNumber, segment: PruneSegment, + purpose: PrunePurpose, ) -> Result, PruneSegmentError> { let result = match self { - PruneMode::Full if segment.min_blocks() == 0 => Some((tip, *self)), + PruneMode::Full if segment.min_blocks(purpose) == 0 => Some((tip, *self)), PruneMode::Distance(distance) if *distance > tip => None, // Nothing to prune yet - PruneMode::Distance(distance) if *distance >= segment.min_blocks() => { + PruneMode::Distance(distance) if *distance >= segment.min_blocks(purpose) => { Some((tip - distance, *self)) } PruneMode::Before(n) if *n > tip => None, // Nothing to prune yet - PruneMode::Before(n) if tip - n >= segment.min_blocks() => Some((n - 1, *self)), + PruneMode::Before(n) if tip - n >= segment.min_blocks(purpose) => Some((n - 1, *self)), _ => return Err(PruneSegmentError::Configuration(segment)), }; Ok(result) @@ -64,7 +72,9 @@ impl Default for PruneMode { #[cfg(test)] mod tests { - use crate::{prune::PruneMode, PruneSegment, PruneSegmentError, MINIMUM_PRUNING_DISTANCE}; + use crate::{ + prune::PruneMode, PrunePurpose, PruneSegment, PruneSegmentError, MINIMUM_PRUNING_DISTANCE, + }; use assert_matches::assert_matches; use serde::Deserialize; @@ -79,8 +89,8 @@ mod tests { // Nothing to prune (PruneMode::Distance(tip + 1), Ok(None)), ( - PruneMode::Distance(segment.min_blocks() + 1), - Ok(Some(tip - (segment.min_blocks() + 1))), + PruneMode::Distance(segment.min_blocks(PrunePurpose::User) + 1), + Ok(Some(tip - (segment.min_blocks(PrunePurpose::User) + 1))), ), // Nothing to prune (PruneMode::Before(tip + 1), Ok(None)), @@ -97,7 +107,7 @@ mod tests { for (index, (mode, expected_result)) in tests.into_iter().enumerate() { assert_eq!( - mode.prune_target_block(tip, segment), + mode.prune_target_block(tip, segment, PrunePurpose::User), expected_result.map(|r| r.map(|b| (b, mode))), "Test {} failed", index + 1, @@ -106,7 +116,7 @@ mod tests { // Test for a scenario where there are no minimum blocks and Full can be used assert_eq!( - PruneMode::Full.prune_target_block(tip, PruneSegment::Transactions), + PruneMode::Full.prune_target_block(tip, PruneSegment::Transactions, PrunePurpose::User), Ok(Some((tip, PruneMode::Full))), ); } diff --git a/crates/primitives/src/prune/segment.rs b/crates/primitives/src/prune/segment.rs index 0806ce909ce5..d88cd0befbee 100644 --- a/crates/primitives/src/prune/segment.rs +++ b/crates/primitives/src/prune/segment.rs @@ -7,19 +7,20 @@ use thiserror::Error; #[main_codec] #[derive(Debug, Display, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash)] pub enum PruneSegment { - /// Prune segment responsible for the `TxSenders` table. + /// Prune segment responsible for the `TransactionSenders` table. SenderRecovery, - /// Prune segment responsible for the `TxHashNumber` table. + /// Prune segment responsible for the `TransactionHashNumbers` table. TransactionLookup, /// Prune segment responsible for all rows in `Receipts` table. Receipts, /// Prune segment responsible for some rows in `Receipts` table filtered by logs. ContractLogs, - /// Prune segment responsible for the `AccountChangeSet` and `AccountHistory` tables. + /// Prune segment responsible for the `AccountChangeSets` and `AccountsHistory` tables. AccountHistory, - /// Prune segment responsible for the `StorageChangeSet` and `StorageHistory` tables. + /// Prune segment responsible for the `StorageChangeSets` and `StoragesHistory` tables. StorageHistory, - /// Prune segment responsible for the `CanonicalHeaders`, `Headers` and `HeaderTD` tables. + /// Prune segment responsible for the `CanonicalHeaders`, `Headers` and + /// `HeaderTerminalDifficulties` tables. Headers, /// Prune segment responsible for the `Transactions` table. Transactions, @@ -27,18 +28,41 @@ pub enum PruneSegment { impl PruneSegment { /// Returns minimum number of blocks to left in the database for this segment. - pub fn min_blocks(&self) -> u64 { + pub fn min_blocks(&self, purpose: PrunePurpose) -> u64 { match self { Self::SenderRecovery | Self::TransactionLookup | Self::Headers | Self::Transactions => { 0 } - Self::Receipts | Self::ContractLogs | Self::AccountHistory | Self::StorageHistory => { + Self::Receipts if purpose.is_static_file() => 0, + Self::ContractLogs | Self::AccountHistory | Self::StorageHistory => { MINIMUM_PRUNING_DISTANCE } + Self::Receipts => MINIMUM_PRUNING_DISTANCE, } } } +/// Prune purpose. +#[derive(Debug, Clone, Copy)] +pub enum PrunePurpose { + /// Prune data according to user configuration. + User, + /// Prune data according to highest static_files to delete the data from database. + StaticFile, +} + +impl PrunePurpose { + /// Returns true if the purpose is [`PrunePurpose::User`]. + pub fn is_user(self) -> bool { + matches!(self, Self::User) + } + + /// Returns true if the purpose is [`PrunePurpose::StaticFile`]. + pub fn is_static_file(self) -> bool { + matches!(self, Self::StaticFile) + } +} + /// PruneSegment error type. #[derive(Debug, Error, PartialEq, Eq, Clone)] pub enum PruneSegmentError { diff --git a/crates/primitives/src/revm/env.rs b/crates/primitives/src/revm/env.rs index f8e02a85ecdb..a46eaf5b32a2 100644 --- a/crates/primitives/src/revm/env.rs +++ b/crates/primitives/src/revm/env.rs @@ -200,7 +200,7 @@ where TransactionKind::Call(to) => TransactTo::Call(to), TransactionKind::Create => TransactTo::create(), }; - tx_env.value = tx.value.into(); + tx_env.value = tx.value; tx_env.data = tx.input.clone(); tx_env.chain_id = tx.chain_id; tx_env.nonce = Some(tx.nonce); @@ -216,7 +216,7 @@ where TransactionKind::Call(to) => TransactTo::Call(to), TransactionKind::Create => TransactTo::create(), }; - tx_env.value = tx.value.into(); + tx_env.value = tx.value; tx_env.data = tx.input.clone(); tx_env.chain_id = Some(tx.chain_id); tx_env.nonce = Some(tx.nonce); @@ -239,7 +239,7 @@ where TransactionKind::Call(to) => TransactTo::Call(to), TransactionKind::Create => TransactTo::create(), }; - tx_env.value = tx.value.into(); + tx_env.value = tx.value; tx_env.data = tx.input.clone(); tx_env.chain_id = Some(tx.chain_id); tx_env.nonce = Some(tx.nonce); @@ -262,7 +262,7 @@ where TransactionKind::Call(to) => TransactTo::Call(to), TransactionKind::Create => TransactTo::create(), }; - tx_env.value = tx.value.into(); + tx_env.value = tx.value; tx_env.data = tx.input.clone(); tx_env.chain_id = Some(tx.chain_id); tx_env.nonce = Some(tx.nonce); @@ -287,7 +287,7 @@ where TransactionKind::Call(to) => tx_env.transact_to = TransactTo::Call(to), TransactionKind::Create => tx_env.transact_to = TransactTo::create(), } - tx_env.value = tx.value.into(); + tx_env.value = tx.value; tx_env.data = tx.input.clone(); tx_env.chain_id = None; tx_env.nonce = None; diff --git a/crates/primitives/src/snapshot/mod.rs b/crates/primitives/src/snapshot/mod.rs deleted file mode 100644 index bbfcec1d2885..000000000000 --- a/crates/primitives/src/snapshot/mod.rs +++ /dev/null @@ -1,47 +0,0 @@ -//! Snapshot primitives. - -mod compression; -mod filters; -mod segment; - -use alloy_primitives::BlockNumber; -pub use compression::Compression; -pub use filters::{Filters, InclusionFilter, PerfectHashingFunction}; -pub use segment::{SegmentConfig, SegmentHeader, SnapshotSegment}; - -/// Default snapshot block count. -pub const BLOCKS_PER_SNAPSHOT: u64 = 500_000; - -/// Highest snapshotted block numbers, per data part. -#[derive(Debug, Clone, Copy, Default, Eq, PartialEq)] -pub struct HighestSnapshots { - /// Highest snapshotted block of headers, inclusive. - /// If [`None`], no snapshot is available. - pub headers: Option, - /// Highest snapshotted block of receipts, inclusive. - /// If [`None`], no snapshot is available. - pub receipts: Option, - /// Highest snapshotted block of transactions, inclusive. - /// If [`None`], no snapshot is available. - pub transactions: Option, -} - -impl HighestSnapshots { - /// Returns the highest snapshot if it exists for a segment - pub fn highest(&self, segment: SnapshotSegment) -> Option { - match segment { - SnapshotSegment::Headers => self.headers, - SnapshotSegment::Transactions => self.transactions, - SnapshotSegment::Receipts => self.receipts, - } - } - - /// Returns a mutable reference to a snapshot segment - pub fn as_mut(&mut self, segment: SnapshotSegment) -> &mut Option { - match segment { - SnapshotSegment::Headers => &mut self.headers, - SnapshotSegment::Transactions => &mut self.transactions, - SnapshotSegment::Receipts => &mut self.receipts, - } - } -} diff --git a/crates/primitives/src/snapshot/segment.rs b/crates/primitives/src/snapshot/segment.rs deleted file mode 100644 index 931db830ad70..000000000000 --- a/crates/primitives/src/snapshot/segment.rs +++ /dev/null @@ -1,287 +0,0 @@ -use crate::{ - snapshot::{Compression, Filters, InclusionFilter}, - BlockNumber, TxNumber, -}; -use derive_more::Display; -use serde::{Deserialize, Serialize}; -use std::{ffi::OsStr, ops::RangeInclusive, str::FromStr}; -use strum::{AsRefStr, EnumIter, EnumString}; - -#[derive( - Debug, - Copy, - Clone, - Eq, - PartialEq, - Hash, - Ord, - PartialOrd, - Deserialize, - Serialize, - EnumString, - EnumIter, - AsRefStr, - Display, -)] -#[cfg_attr(feature = "clap", derive(clap::ValueEnum))] -/// Segment of the data that can be snapshotted. -pub enum SnapshotSegment { - #[strum(serialize = "headers")] - /// Snapshot segment responsible for the `CanonicalHeaders`, `Headers`, `HeaderTD` tables. - Headers, - #[strum(serialize = "transactions")] - /// Snapshot segment responsible for the `Transactions` table. - Transactions, - #[strum(serialize = "receipts")] - /// Snapshot segment responsible for the `Receipts` table. - Receipts, -} - -impl SnapshotSegment { - /// Returns the default configuration of the segment. - pub const fn config(&self) -> SegmentConfig { - let default_config = SegmentConfig { - filters: Filters::WithFilters( - InclusionFilter::Cuckoo, - super::PerfectHashingFunction::Fmph, - ), - compression: Compression::Lz4, - }; - - match self { - SnapshotSegment::Headers => default_config, - SnapshotSegment::Transactions => default_config, - SnapshotSegment::Receipts => default_config, - } - } - - /// Returns the default file name for the provided segment and range. - pub fn filename( - &self, - block_range: &RangeInclusive, - tx_range: &RangeInclusive, - ) -> String { - // ATTENTION: if changing the name format, be sure to reflect those changes in - // [`Self::parse_filename`]. - format!( - "snapshot_{}_{}_{}_{}_{}", - self.as_ref(), - block_range.start(), - block_range.end(), - tx_range.start(), - tx_range.end(), - ) - } - - /// Returns file name for the provided segment and range, alongisde filters, compression. - pub fn filename_with_configuration( - &self, - filters: Filters, - compression: Compression, - block_range: &RangeInclusive, - tx_range: &RangeInclusive, - ) -> String { - let prefix = self.filename(block_range, tx_range); - - let filters_name = match filters { - Filters::WithFilters(inclusion_filter, phf) => { - format!("{}-{}", inclusion_filter.as_ref(), phf.as_ref()) - } - Filters::WithoutFilters => "none".to_string(), - }; - - // ATTENTION: if changing the name format, be sure to reflect those changes in - // [`Self::parse_filename`.] - format!("{prefix}_{}_{}", filters_name, compression.as_ref()) - } - - /// Parses a filename into a `SnapshotSegment` and its corresponding block and transaction - /// ranges. - /// - /// The filename is expected to follow the format: - /// "snapshot_{segment}_{block_start}_{block_end}_{tx_start}_{tx_end}". This function checks - /// for the correct prefix ("snapshot"), and then parses the segment and the inclusive - /// ranges for blocks and transactions. It ensures that the start of each range is less than the - /// end. - /// - /// # Returns - /// - `Some((segment, block_range, tx_range))` if parsing is successful and all conditions are - /// met. - /// - `None` if any condition fails, such as an incorrect prefix, parsing error, or invalid - /// range. - /// - /// # Note - /// This function is tightly coupled with the naming convention defined in [`Self::filename`]. - /// Any changes in the filename format in `filename` should be reflected here. - pub fn parse_filename( - name: &OsStr, - ) -> Option<(Self, RangeInclusive, RangeInclusive)> { - let mut parts = name.to_str()?.split('_'); - if parts.next() != Some("snapshot") { - return None - } - - let segment = Self::from_str(parts.next()?).ok()?; - let (block_start, block_end) = (parts.next()?.parse().ok()?, parts.next()?.parse().ok()?); - let (tx_start, tx_end) = (parts.next()?.parse().ok()?, parts.next()?.parse().ok()?); - - if block_start >= block_end || tx_start > tx_end { - return None - } - - Some((segment, block_start..=block_end, tx_start..=tx_end)) - } -} - -/// A segment header that contains information common to all segments. Used for storage. -#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)] -pub struct SegmentHeader { - /// Block range of the snapshot segment - block_range: RangeInclusive, - /// Transaction range of the snapshot segment - tx_range: RangeInclusive, - /// Segment type - segment: SnapshotSegment, -} - -impl SegmentHeader { - /// Returns [`SegmentHeader`]. - pub fn new( - block_range: RangeInclusive, - tx_range: RangeInclusive, - segment: SnapshotSegment, - ) -> Self { - Self { block_range, tx_range, segment } - } - - /// Returns the transaction range. - pub fn tx_range(&self) -> &RangeInclusive { - &self.tx_range - } - - /// Returns the block range. - pub fn block_range(&self) -> &RangeInclusive { - &self.block_range - } - - /// Returns the first block number of the segment. - pub fn block_start(&self) -> BlockNumber { - *self.block_range.start() - } - - /// Returns the last block number of the segment. - pub fn block_end(&self) -> BlockNumber { - *self.block_range.end() - } - - /// Returns the first transaction number of the segment. - pub fn tx_start(&self) -> TxNumber { - *self.tx_range.start() - } - - /// Returns the row offset which depends on whether the segment is block or transaction based. - pub fn start(&self) -> u64 { - match self.segment { - SnapshotSegment::Headers => self.block_start(), - SnapshotSegment::Transactions | SnapshotSegment::Receipts => self.tx_start(), - } - } -} - -/// Configuration used on the segment. -#[derive(Debug, Clone, Copy)] -pub struct SegmentConfig { - /// Inclusion filters used on the segment - pub filters: Filters, - /// Compression used on the segment - pub compression: Compression, -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_filename() { - let test_vectors = [ - (SnapshotSegment::Headers, 2..=30, 0..=1, "snapshot_headers_2_30_0_1", None), - ( - SnapshotSegment::Receipts, - 30..=300, - 110..=1000, - "snapshot_receipts_30_300_110_1000", - None, - ), - ( - SnapshotSegment::Transactions, - 1_123_233..=11_223_233, - 1_123_233..=2_123_233, - "snapshot_transactions_1123233_11223233_1123233_2123233", - None, - ), - ( - SnapshotSegment::Headers, - 2..=30, - 0..=1, - "snapshot_headers_2_30_0_1_cuckoo-fmph_lz4", - Some(( - Compression::Lz4, - Filters::WithFilters( - InclusionFilter::Cuckoo, - crate::snapshot::PerfectHashingFunction::Fmph, - ), - )), - ), - ( - SnapshotSegment::Headers, - 2..=30, - 0..=1, - "snapshot_headers_2_30_0_1_cuckoo-fmph_zstd", - Some(( - Compression::Zstd, - Filters::WithFilters( - InclusionFilter::Cuckoo, - crate::snapshot::PerfectHashingFunction::Fmph, - ), - )), - ), - ( - SnapshotSegment::Headers, - 2..=30, - 0..=1, - "snapshot_headers_2_30_0_1_cuckoo-fmph_zstd-dict", - Some(( - Compression::ZstdWithDictionary, - Filters::WithFilters( - InclusionFilter::Cuckoo, - crate::snapshot::PerfectHashingFunction::Fmph, - ), - )), - ), - ]; - - for (segment, block_range, tx_range, filename, configuration) in test_vectors { - if let Some((compression, filters)) = configuration { - assert_eq!( - segment.filename_with_configuration( - filters, - compression, - &block_range, - &tx_range - ), - filename - ); - } else { - assert_eq!(segment.filename(&block_range, &tx_range), filename); - } - - assert_eq!( - SnapshotSegment::parse_filename(OsStr::new(filename)), - Some((segment, block_range, tx_range)) - ); - } - - assert_eq!(SnapshotSegment::parse_filename(OsStr::new("snapshot_headers_2_30_3_2")), None); - assert_eq!(SnapshotSegment::parse_filename(OsStr::new("snapshot_headers_2_30_1")), None); - } -} diff --git a/crates/primitives/src/stage/checkpoints.rs b/crates/primitives/src/stage/checkpoints.rs index b238c77e19d3..c0cace519cbc 100644 --- a/crates/primitives/src/stage/checkpoints.rs +++ b/crates/primitives/src/stage/checkpoints.rs @@ -2,9 +2,8 @@ use crate::{ trie::{hash_builder::HashBuilderState, StoredSubNode}, Address, BlockNumber, B256, }; -use bytes::{Buf, BufMut}; -use reth_codecs::{derive_arbitrary, main_codec, Compact}; -use serde::{Deserialize, Serialize}; +use bytes::Buf; +use reth_codecs::{main_codec, Compact}; use std::ops::RangeInclusive; /// Saves the progress of Merkle stage. @@ -14,9 +13,6 @@ pub struct MerkleCheckpoint { pub target_block: BlockNumber, /// The last hashed account key processed. pub last_account_key: B256, - // TODO: remove in the next breaking release. - /// The last walker key processed. - pub last_walker_key: Vec, /// Previously recorded walker stack. pub walker_stack: Vec, /// The hash builder state. @@ -31,13 +27,7 @@ impl MerkleCheckpoint { walker_stack: Vec, state: HashBuilderState, ) -> Self { - Self { - target_block, - last_account_key, - walker_stack, - state, - last_walker_key: Vec::default(), - } + Self { target_block, last_account_key, walker_stack, state } } } @@ -54,10 +44,6 @@ impl Compact for MerkleCheckpoint { buf.put_slice(self.last_account_key.as_slice()); len += self.last_account_key.len(); - buf.put_u16(self.last_walker_key.len() as u16); - buf.put_slice(&self.last_walker_key[..]); - len += 2 + self.last_walker_key.len(); - buf.put_u16(self.walker_stack.len() as u16); len += 2; for item in self.walker_stack.into_iter() { @@ -74,10 +60,6 @@ impl Compact for MerkleCheckpoint { let last_account_key = B256::from_slice(&buf[..32]); buf.advance(32); - let last_walker_key_len = buf.get_u16() as usize; - let last_walker_key = Vec::from(&buf[..last_walker_key_len]); - buf.advance(last_walker_key_len); - let walker_stack_len = buf.get_u16() as usize; let mut walker_stack = Vec::with_capacity(walker_stack_len); for _ in 0..walker_stack_len { @@ -87,16 +69,7 @@ impl Compact for MerkleCheckpoint { } let (state, buf) = HashBuilderState::from_compact(buf, 0); - ( - MerkleCheckpoint { - target_block, - last_account_key, - last_walker_key, - walker_stack, - state, - }, - buf, - ) + (MerkleCheckpoint { target_block, last_account_key, walker_stack, state }, buf) } } @@ -250,8 +223,8 @@ impl StageCheckpoint { // TODO(alexey): add a merkle checkpoint. Currently it's hard because [`MerkleCheckpoint`] // is not a Copy type. /// Stage-specific checkpoint metrics. -#[derive_arbitrary(compact)] -#[derive(Debug, PartialEq, Eq, Clone, Copy, Serialize, Deserialize)] +#[main_codec] +#[derive(Debug, PartialEq, Eq, Clone, Copy)] pub enum StageUnitCheckpoint { /// Saves the progress of AccountHashing stage. Account(AccountHashingCheckpoint), @@ -267,39 +240,16 @@ pub enum StageUnitCheckpoint { IndexHistory(IndexHistoryCheckpoint), } -/// Generates: -/// 1. [Compact::to_compact] and [Compact::from_compact] implementations for [StageUnitCheckpoint]. -/// 2. [StageCheckpoint] getter and builder methods. +#[cfg(test)] +impl Default for StageUnitCheckpoint { + fn default() -> Self { + Self::Account(AccountHashingCheckpoint::default()) + } +} + +/// Generates [StageCheckpoint] getter and builder methods. macro_rules! stage_unit_checkpoints { ($(($index:expr,$enum_variant:tt,$checkpoint_ty:ty,#[doc = $fn_get_doc:expr]$fn_get_name:ident,#[doc = $fn_build_doc:expr]$fn_build_name:ident)),+) => { - impl Compact for StageUnitCheckpoint { - fn to_compact(self, buf: &mut B) -> usize - where - B: BufMut + AsMut<[u8]>, - { - match self { - $( - StageUnitCheckpoint::$enum_variant(data) => { - buf.put_u8($index); - 1 + data.to_compact(buf) - } - )+ - } - } - - fn from_compact(buf: &[u8], _len: usize) -> (Self, &[u8]) { - match buf[0] { - $( - $index => { - let (data, buf) = <$checkpoint_ty>::from_compact(&buf[1..], buf.len() - 1); - (Self::$enum_variant(data), buf) - } - )+ - _ => unreachable!("Junk data in database: unknown StageUnitCheckpoint variant"), - } - } - } - impl StageCheckpoint { $( #[doc = $fn_get_doc] @@ -391,7 +341,6 @@ mod tests { let checkpoint = MerkleCheckpoint { target_block: rng.gen(), last_account_key: rng.gen(), - last_walker_key: B256::random_with(&mut rng).to_vec(), walker_stack: vec![StoredSubNode { key: B256::random_with(&mut rng).to_vec(), nibble: Some(rng.gen()), @@ -405,53 +354,4 @@ mod tests { let (decoded, _) = MerkleCheckpoint::from_compact(&buf, encoded); assert_eq!(decoded, checkpoint); } - - #[test] - fn stage_unit_checkpoint_roundtrip() { - let mut rng = rand::thread_rng(); - let checkpoints = vec![ - StageUnitCheckpoint::Account(AccountHashingCheckpoint { - address: Some(rng.gen()), - block_range: CheckpointBlockRange { from: rng.gen(), to: rng.gen() }, - progress: EntitiesCheckpoint { - processed: rng.gen::() as u64, - total: u32::MAX as u64 + rng.gen::(), - }, - }), - StageUnitCheckpoint::Storage(StorageHashingCheckpoint { - address: Some(rng.gen()), - storage: Some(rng.gen()), - block_range: CheckpointBlockRange { from: rng.gen(), to: rng.gen() }, - progress: EntitiesCheckpoint { - processed: rng.gen::() as u64, - total: u32::MAX as u64 + rng.gen::(), - }, - }), - StageUnitCheckpoint::Entities(EntitiesCheckpoint { - processed: rng.gen::() as u64, - total: u32::MAX as u64 + rng.gen::(), - }), - StageUnitCheckpoint::Execution(ExecutionCheckpoint { - block_range: CheckpointBlockRange { from: rng.gen(), to: rng.gen() }, - progress: EntitiesCheckpoint { - processed: rng.gen::() as u64, - total: u32::MAX as u64 + rng.gen::(), - }, - }), - StageUnitCheckpoint::Headers(HeadersCheckpoint { - block_range: CheckpointBlockRange { from: rng.gen(), to: rng.gen() }, - progress: EntitiesCheckpoint { - processed: rng.gen::() as u64, - total: u32::MAX as u64 + rng.gen::(), - }, - }), - ]; - - for checkpoint in checkpoints { - let mut buf = Vec::new(); - let encoded = checkpoint.to_compact(&mut buf); - let (decoded, _) = StageUnitCheckpoint::from_compact(&buf, encoded); - assert_eq!(decoded, checkpoint); - } - } } diff --git a/crates/primitives/src/stage/id.rs b/crates/primitives/src/stage/id.rs index ac44a5e46d9f..df92bd112c71 100644 --- a/crates/primitives/src/stage/id.rs +++ b/crates/primitives/src/stage/id.rs @@ -3,10 +3,10 @@ /// For custom stages, use [`StageId::Other`] #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] pub enum StageId { + /// Static File stage in the process. + StaticFile, /// Header stage in the process. Headers, - /// Total difficulty stage in the process. - TotalDifficulty, /// Bodies stage in the process. Bodies, /// Sender recovery stage in the process. @@ -36,8 +36,8 @@ pub enum StageId { impl StageId { /// All supported Stages pub const ALL: [StageId; 13] = [ + StageId::StaticFile, StageId::Headers, - StageId::TotalDifficulty, StageId::Bodies, StageId::SenderRecovery, StageId::Execution, @@ -54,8 +54,8 @@ impl StageId { /// Return stage id formatted as string. pub fn as_str(&self) -> &str { match self { + StageId::StaticFile => "StaticFile", StageId::Headers => "Headers", - StageId::TotalDifficulty => "TotalDifficulty", StageId::Bodies => "Bodies", StageId::SenderRecovery => "SenderRecovery", StageId::Execution => "Execution", @@ -94,8 +94,8 @@ mod tests { #[test] fn stage_id_as_string() { + assert_eq!(StageId::StaticFile.to_string(), "StaticFile"); assert_eq!(StageId::Headers.to_string(), "Headers"); - assert_eq!(StageId::TotalDifficulty.to_string(), "TotalDifficulty"); assert_eq!(StageId::Bodies.to_string(), "Bodies"); assert_eq!(StageId::SenderRecovery.to_string(), "SenderRecovery"); assert_eq!(StageId::Execution.to_string(), "Execution"); diff --git a/crates/primitives/src/snapshot/compression.rs b/crates/primitives/src/static_file/compression.rs similarity index 86% rename from crates/primitives/src/snapshot/compression.rs rename to crates/primitives/src/static_file/compression.rs index 2d5599c2cda9..f1a64a501403 100644 --- a/crates/primitives/src/snapshot/compression.rs +++ b/crates/primitives/src/static_file/compression.rs @@ -1,6 +1,6 @@ use strum::AsRefStr; -/// Snapshot compression types. +/// Static File compression types. #[derive(Debug, Copy, Clone, Default, AsRefStr)] #[cfg_attr(feature = "clap", derive(clap::ValueEnum))] pub enum Compression { @@ -13,7 +13,7 @@ pub enum Compression { /// Zstandard (Zstd) compression algorithm with a dictionary. #[strum(serialize = "zstd-dict")] ZstdWithDictionary, - /// No compression, uncompressed snapshot. + /// No compression. #[strum(serialize = "uncompressed")] #[default] Uncompressed, diff --git a/crates/primitives/src/snapshot/filters.rs b/crates/primitives/src/static_file/filters.rs similarity index 71% rename from crates/primitives/src/snapshot/filters.rs rename to crates/primitives/src/static_file/filters.rs index 3443d474706e..cc844468e545 100644 --- a/crates/primitives/src/snapshot/filters.rs +++ b/crates/primitives/src/static_file/filters.rs @@ -1,16 +1,16 @@ use strum::AsRefStr; #[derive(Debug, Copy, Clone)] -/// Snapshot filters. +/// Static File filters. pub enum Filters { - /// Snapshot uses filters with [InclusionFilter] and [PerfectHashingFunction]. + /// Static File uses filters with [InclusionFilter] and [PerfectHashingFunction]. WithFilters(InclusionFilter, PerfectHashingFunction), - /// Snapshot doesn't use any filters. + /// Static File doesn't use any filters. WithoutFilters, } impl Filters { - /// Returns `true` if snapshot uses filters. + /// Returns `true` if static file uses filters. pub const fn has_filters(&self) -> bool { matches!(self, Self::WithFilters(_, _)) } @@ -18,7 +18,7 @@ impl Filters { #[derive(Debug, Copy, Clone, AsRefStr)] #[cfg_attr(feature = "clap", derive(clap::ValueEnum))] -/// Snapshot inclusion filter. Also see [Filters]. +/// Static File inclusion filter. Also see [Filters]. pub enum InclusionFilter { #[strum(serialize = "cuckoo")] /// Cuckoo filter @@ -27,7 +27,7 @@ pub enum InclusionFilter { #[derive(Debug, Copy, Clone, AsRefStr)] #[cfg_attr(feature = "clap", derive(clap::ValueEnum))] -/// Snapshot perfect hashing function. Also see [Filters]. +/// Static File perfect hashing function. Also see [Filters]. pub enum PerfectHashingFunction { #[strum(serialize = "fmph")] /// Fingerprint-Based Minimal Perfect Hash Function diff --git a/crates/primitives/src/static_file/mod.rs b/crates/primitives/src/static_file/mod.rs new file mode 100644 index 000000000000..fe15bd1c759a --- /dev/null +++ b/crates/primitives/src/static_file/mod.rs @@ -0,0 +1,54 @@ +//! StaticFile primitives. + +mod compression; +mod filters; +mod segment; + +use alloy_primitives::BlockNumber; +pub use compression::Compression; +pub use filters::{Filters, InclusionFilter, PerfectHashingFunction}; +pub use segment::{SegmentConfig, SegmentHeader, SegmentRangeInclusive, StaticFileSegment}; + +/// Default static file block count. +pub const BLOCKS_PER_STATIC_FILE: u64 = 500_000; + +/// Highest static file block numbers, per data part. +#[derive(Debug, Clone, Copy, Default, Eq, PartialEq)] +pub struct HighestStaticFiles { + /// Highest static file block of headers, inclusive. + /// If [`None`], no static file is available. + pub headers: Option, + /// Highest static file block of receipts, inclusive. + /// If [`None`], no static file is available. + pub receipts: Option, + /// Highest static file block of transactions, inclusive. + /// If [`None`], no static file is available. + pub transactions: Option, +} + +impl HighestStaticFiles { + /// Returns the highest static file if it exists for a segment + pub fn highest(&self, segment: StaticFileSegment) -> Option { + match segment { + StaticFileSegment::Headers => self.headers, + StaticFileSegment::Transactions => self.transactions, + StaticFileSegment::Receipts => self.receipts, + } + } + + /// Returns a mutable reference to a static file segment + pub fn as_mut(&mut self, segment: StaticFileSegment) -> &mut Option { + match segment { + StaticFileSegment::Headers => &mut self.headers, + StaticFileSegment::Transactions => &mut self.transactions, + StaticFileSegment::Receipts => &mut self.receipts, + } + } +} + +/// Each static file has a fixed number of blocks. This gives out the range where the requested +/// block is positioned. Used for segment filename. +pub fn find_fixed_range(block: BlockNumber) -> SegmentRangeInclusive { + let start = (block / BLOCKS_PER_STATIC_FILE) * BLOCKS_PER_STATIC_FILE; + SegmentRangeInclusive::new(start, start + BLOCKS_PER_STATIC_FILE - 1) +} diff --git a/crates/primitives/src/static_file/segment.rs b/crates/primitives/src/static_file/segment.rs new file mode 100644 index 000000000000..8f9e3e08af2b --- /dev/null +++ b/crates/primitives/src/static_file/segment.rs @@ -0,0 +1,435 @@ +use crate::{ + static_file::{Compression, Filters, InclusionFilter}, + BlockNumber, TxNumber, +}; +use derive_more::Display; +use serde::{Deserialize, Serialize}; +use std::{ops::RangeInclusive, str::FromStr}; +use strum::{AsRefStr, EnumIter, EnumString}; + +#[derive( + Debug, + Copy, + Clone, + Eq, + PartialEq, + Hash, + Ord, + PartialOrd, + Deserialize, + Serialize, + EnumString, + EnumIter, + AsRefStr, + Display, +)] +#[cfg_attr(feature = "clap", derive(clap::ValueEnum))] +/// Segment of the data that can be moved to static files. +pub enum StaticFileSegment { + #[strum(serialize = "headers")] + /// Static File segment responsible for the `CanonicalHeaders`, `Headers`, + /// `HeaderTerminalDifficulties` tables. + Headers, + #[strum(serialize = "transactions")] + /// Static File segment responsible for the `Transactions` table. + Transactions, + #[strum(serialize = "receipts")] + /// Static File segment responsible for the `Receipts` table. + Receipts, +} + +impl StaticFileSegment { + /// Returns the segment as a string. + pub const fn as_str(&self) -> &'static str { + match self { + StaticFileSegment::Headers => "headers", + StaticFileSegment::Transactions => "transactions", + StaticFileSegment::Receipts => "receipts", + } + } + + /// Returns the default configuration of the segment. + pub const fn config(&self) -> SegmentConfig { + let default_config = SegmentConfig { + filters: Filters::WithFilters( + InclusionFilter::Cuckoo, + super::PerfectHashingFunction::Fmph, + ), + compression: Compression::Lz4, + }; + + match self { + StaticFileSegment::Headers => default_config, + StaticFileSegment::Transactions => default_config, + StaticFileSegment::Receipts => default_config, + } + } + + /// Returns the number of columns for the segment + pub const fn columns(&self) -> usize { + match self { + StaticFileSegment::Headers => 3, + StaticFileSegment::Transactions => 1, + StaticFileSegment::Receipts => 1, + } + } + + /// Returns the default file name for the provided segment and range. + pub fn filename(&self, block_range: &SegmentRangeInclusive) -> String { + // ATTENTION: if changing the name format, be sure to reflect those changes in + // [`Self::parse_filename`]. + format!("static_file_{}_{}_{}", self.as_ref(), block_range.start(), block_range.end()) + } + + /// Returns file name for the provided segment and range, alongisde filters, compression. + pub fn filename_with_configuration( + &self, + filters: Filters, + compression: Compression, + block_range: &SegmentRangeInclusive, + ) -> String { + let prefix = self.filename(block_range); + + let filters_name = match filters { + Filters::WithFilters(inclusion_filter, phf) => { + format!("{}-{}", inclusion_filter.as_ref(), phf.as_ref()) + } + Filters::WithoutFilters => "none".to_string(), + }; + + // ATTENTION: if changing the name format, be sure to reflect those changes in + // [`Self::parse_filename`.] + format!("{prefix}_{}_{}", filters_name, compression.as_ref()) + } + + /// Parses a filename into a `StaticFileSegment` and its expected block range. + /// + /// The filename is expected to follow the format: + /// "static_file_{segment}_{block_start}_{block_end}". This function checks + /// for the correct prefix ("static_file"), and then parses the segment and the inclusive + /// ranges for blocks. It ensures that the start of each range is less than or equal to the + /// end. + /// + /// # Returns + /// - `Some((segment, block_range))` if parsing is successful and all conditions are met. + /// - `None` if any condition fails, such as an incorrect prefix, parsing error, or invalid + /// range. + /// + /// # Note + /// This function is tightly coupled with the naming convention defined in [`Self::filename`]. + /// Any changes in the filename format in `filename` should be reflected here. + pub fn parse_filename(name: &str) -> Option<(Self, SegmentRangeInclusive)> { + let mut parts = name.split('_'); + if !(parts.next() == Some("static") && parts.next() == Some("file")) { + return None + } + + let segment = Self::from_str(parts.next()?).ok()?; + let (block_start, block_end) = (parts.next()?.parse().ok()?, parts.next()?.parse().ok()?); + + if block_start > block_end { + return None + } + + Some((segment, SegmentRangeInclusive::new(block_start, block_end))) + } + + /// Returns `true` if the segment is `StaticFileSegment::Headers`. + pub fn is_headers(&self) -> bool { + matches!(self, StaticFileSegment::Headers) + } +} + +/// A segment header that contains information common to all segments. Used for storage. +#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash, Clone)] +pub struct SegmentHeader { + /// Defines the expected block range for a static file segment. This attribute is crucial for + /// scenarios where the file contains no data, allowing for a representation beyond a + /// simple `start..=start` range. It ensures clarity in differentiating between an empty file + /// and a file with a single block numbered 0. + expected_block_range: SegmentRangeInclusive, + /// Block range of data on the static file segment + block_range: Option, + /// Transaction range of data of the static file segment + tx_range: Option, + /// Segment type + segment: StaticFileSegment, +} + +impl SegmentHeader { + /// Returns [`SegmentHeader`]. + pub fn new( + expected_block_range: SegmentRangeInclusive, + block_range: Option, + tx_range: Option, + segment: StaticFileSegment, + ) -> Self { + Self { expected_block_range, block_range, tx_range, segment } + } + + /// Returns the static file segment kind. + pub fn segment(&self) -> StaticFileSegment { + self.segment + } + + /// Returns the block range. + pub fn block_range(&self) -> Option<&SegmentRangeInclusive> { + self.block_range.as_ref() + } + + /// Returns the transaction range. + pub fn tx_range(&self) -> Option<&SegmentRangeInclusive> { + self.tx_range.as_ref() + } + + /// The expected block start of the segment. + pub fn expected_block_start(&self) -> BlockNumber { + self.expected_block_range.start() + } + + /// The expected block end of the segment. + pub fn expected_block_end(&self) -> BlockNumber { + self.expected_block_range.end() + } + + /// Returns the first block number of the segment. + pub fn block_start(&self) -> Option { + self.block_range.as_ref().map(|b| b.start()) + } + + /// Returns the last block number of the segment. + pub fn block_end(&self) -> Option { + self.block_range.as_ref().map(|b| b.end()) + } + + /// Returns the first transaction number of the segment. + pub fn tx_start(&self) -> Option { + self.tx_range.as_ref().map(|t| t.start()) + } + + /// Returns the last transaction number of the segment. + pub fn tx_end(&self) -> Option { + self.tx_range.as_ref().map(|t| t.end()) + } + + /// Number of transactions. + pub fn tx_len(&self) -> Option { + self.tx_range.as_ref().map(|r| (r.end() + 1) - r.start()) + } + + /// Number of blocks. + pub fn block_len(&self) -> Option { + self.block_range.as_ref().map(|r| (r.end() + 1) - r.start()) + } + + /// Increments block end range depending on segment + pub fn increment_block(&mut self) -> BlockNumber { + if let Some(block_range) = &mut self.block_range { + block_range.end += 1; + block_range.end + } else { + self.block_range = Some(SegmentRangeInclusive::new( + self.expected_block_start(), + self.expected_block_start(), + )); + self.expected_block_start() + } + } + + /// Increments tx end range depending on segment + pub fn increment_tx(&mut self) { + match self.segment { + StaticFileSegment::Headers => (), + StaticFileSegment::Transactions | StaticFileSegment::Receipts => { + if let Some(tx_range) = &mut self.tx_range { + tx_range.end += 1; + } else { + self.tx_range = Some(SegmentRangeInclusive::new(0, 0)); + } + } + } + } + + /// Removes `num` elements from end of tx or block range. + pub fn prune(&mut self, num: u64) { + match self.segment { + StaticFileSegment::Headers => { + if let Some(range) = &mut self.block_range { + if num > range.end { + self.block_range = None; + } else { + range.end = range.end.saturating_sub(num); + } + }; + } + StaticFileSegment::Transactions | StaticFileSegment::Receipts => { + if let Some(range) = &mut self.tx_range { + if num > range.end { + self.tx_range = None; + } else { + range.end = range.end.saturating_sub(num); + } + }; + } + }; + } + + /// Sets a new block_range. + pub fn set_block_range(&mut self, block_start: BlockNumber, block_end: BlockNumber) { + if let Some(block_range) = &mut self.block_range { + block_range.start = block_start; + block_range.end = block_end; + } else { + self.block_range = Some(SegmentRangeInclusive::new(block_start, block_end)) + } + } + + /// Sets a new tx_range. + pub fn set_tx_range(&mut self, tx_start: TxNumber, tx_end: TxNumber) { + if let Some(tx_range) = &mut self.tx_range { + tx_range.start = tx_start; + tx_range.end = tx_end; + } else { + self.tx_range = Some(SegmentRangeInclusive::new(tx_start, tx_end)) + } + } + + /// Returns the row offset which depends on whether the segment is block or transaction based. + pub fn start(&self) -> Option { + match self.segment { + StaticFileSegment::Headers => self.block_start(), + StaticFileSegment::Transactions | StaticFileSegment::Receipts => self.tx_start(), + } + } +} + +/// Configuration used on the segment. +#[derive(Debug, Clone, Copy)] +pub struct SegmentConfig { + /// Inclusion filters used on the segment + pub filters: Filters, + /// Compression used on the segment + pub compression: Compression, +} + +/// Helper type to handle segment transaction and block INCLUSIVE ranges. +/// +/// They can be modified on a hot loop, which makes the `std::ops::RangeInclusive` a poor fit. +#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash, Clone, Copy)] +pub struct SegmentRangeInclusive { + start: u64, + end: u64, +} + +impl SegmentRangeInclusive { + /// Creates a new [`SegmentRangeInclusive`] + pub fn new(start: u64, end: u64) -> Self { + Self { start, end } + } + + /// Start of the inclusive range + pub fn start(&self) -> u64 { + self.start + } + + /// End of the inclusive range + pub fn end(&self) -> u64 { + self.end + } +} + +impl std::fmt::Display for SegmentRangeInclusive { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}..={}", self.start, self.end) + } +} + +impl From> for SegmentRangeInclusive { + fn from(value: RangeInclusive) -> Self { + SegmentRangeInclusive { start: *value.start(), end: *value.end() } + } +} + +impl From<&SegmentRangeInclusive> for RangeInclusive { + fn from(value: &SegmentRangeInclusive) -> Self { + value.start()..=value.end() + } +} + +impl From for RangeInclusive { + fn from(value: SegmentRangeInclusive) -> Self { + (&value).into() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_filename() { + let test_vectors = [ + (StaticFileSegment::Headers, 2..=30, "static_file_headers_2_30", None), + (StaticFileSegment::Receipts, 30..=300, "static_file_receipts_30_300", None), + ( + StaticFileSegment::Transactions, + 1_123_233..=11_223_233, + "static_file_transactions_1123233_11223233", + None, + ), + ( + StaticFileSegment::Headers, + 2..=30, + "static_file_headers_2_30_cuckoo-fmph_lz4", + Some(( + Compression::Lz4, + Filters::WithFilters( + InclusionFilter::Cuckoo, + crate::static_file::PerfectHashingFunction::Fmph, + ), + )), + ), + ( + StaticFileSegment::Headers, + 2..=30, + "static_file_headers_2_30_cuckoo-fmph_zstd", + Some(( + Compression::Zstd, + Filters::WithFilters( + InclusionFilter::Cuckoo, + crate::static_file::PerfectHashingFunction::Fmph, + ), + )), + ), + ( + StaticFileSegment::Headers, + 2..=30, + "static_file_headers_2_30_cuckoo-fmph_zstd-dict", + Some(( + Compression::ZstdWithDictionary, + Filters::WithFilters( + InclusionFilter::Cuckoo, + crate::static_file::PerfectHashingFunction::Fmph, + ), + )), + ), + ]; + + for (segment, block_range, filename, configuration) in test_vectors { + let block_range: SegmentRangeInclusive = block_range.into(); + if let Some((compression, filters)) = configuration { + assert_eq!( + segment.filename_with_configuration(filters, compression, &block_range,), + filename + ); + } else { + assert_eq!(segment.filename(&block_range), filename); + } + + assert_eq!(StaticFileSegment::parse_filename(filename), Some((segment, block_range))); + } + + assert_eq!(StaticFileSegment::parse_filename("static_file_headers_2"), None); + assert_eq!(StaticFileSegment::parse_filename("static_file_headers_"), None); + } +} diff --git a/crates/primitives/src/storage.rs b/crates/primitives/src/storage.rs index b2f5ab1d9de9..2e03424d2b46 100644 --- a/crates/primitives/src/storage.rs +++ b/crates/primitives/src/storage.rs @@ -4,7 +4,7 @@ use serde::{Deserialize, Serialize}; /// Account storage entry. /// -/// `key` is the subkey when used as a value in the `StorageChangeSet` table. +/// `key` is the subkey when used as a value in the `StorageChangeSets` table. #[derive_arbitrary(compact)] #[derive(Debug, Default, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Ord)] pub struct StorageEntry { diff --git a/crates/primitives/src/transaction/access_list.rs b/crates/primitives/src/transaction/access_list.rs index ba74aaf1435d..a20ac67cf420 100644 --- a/crates/primitives/src/transaction/access_list.rs +++ b/crates/primitives/src/transaction/access_list.rs @@ -1,5 +1,4 @@ -use crate::{Address, B256}; -use alloy_primitives::U256; +use crate::{Address, B256, U256}; use alloy_rlp::{RlpDecodable, RlpDecodableWrapper, RlpEncodable, RlpEncodableWrapper}; use reth_codecs::{main_codec, Compact}; use std::{ diff --git a/crates/primitives/src/transaction/eip1559.rs b/crates/primitives/src/transaction/eip1559.rs index 5c299806b834..f78de0372fa2 100644 --- a/crates/primitives/src/transaction/eip1559.rs +++ b/crates/primitives/src/transaction/eip1559.rs @@ -1,5 +1,5 @@ use super::access_list::AccessList; -use crate::{keccak256, Bytes, ChainId, Signature, TransactionKind, TxType, TxValue, B256}; +use crate::{keccak256, Bytes, ChainId, Signature, TransactionKind, TxType, B256, U256}; use alloy_rlp::{length_of_length, Decodable, Encodable, Header}; use bytes::BytesMut; use reth_codecs::{main_codec, Compact}; @@ -46,7 +46,7 @@ pub struct TxEip1559 { /// be transferred to the message call’s recipient or, /// in the case of contract creation, as an endowment /// to the newly created account; formally Tv. - pub value: TxValue, + pub value: U256, /// The accessList specifies a list of addresses and storage keys; /// these addresses and storage keys are added into the `accessed_addresses` /// and `accessed_storage_keys` global sets (introduced in EIP-2929). @@ -188,7 +188,7 @@ impl TxEip1559 { mem::size_of::() + // max_fee_per_gas mem::size_of::() + // max_priority_fee_per_gas self.to.size() + // to - mem::size_of::() + // value + mem::size_of::() + // value self.access_list.size() + // access_list self.input.len() // input } @@ -244,7 +244,7 @@ mod tests { nonce: 0x42, gas_limit: 44386, to: TransactionKind::Call( hex!("6069a6c32cf691f5982febae4faf8a6f3ab2f0f6").into()), - value: 0_u64.into(), + value: U256::ZERO, input: hex!("a22cb4650000000000000000000000005eee75727d804a2b13038928d36f8b188945a57a0000000000000000000000000000000000000000000000000000000000000000").into(), max_fee_per_gas: 0x4a817c800, max_priority_fee_per_gas: 0x3b9aca00, diff --git a/crates/primitives/src/transaction/eip2930.rs b/crates/primitives/src/transaction/eip2930.rs index 036af743f212..b9c8328bb13b 100644 --- a/crates/primitives/src/transaction/eip2930.rs +++ b/crates/primitives/src/transaction/eip2930.rs @@ -1,5 +1,5 @@ use super::access_list::AccessList; -use crate::{keccak256, Bytes, ChainId, Signature, TransactionKind, TxType, TxValue, B256}; +use crate::{keccak256, Bytes, ChainId, Signature, TransactionKind, TxType, B256, U256}; use alloy_rlp::{length_of_length, Decodable, Encodable, Header}; use bytes::BytesMut; use reth_codecs::{main_codec, Compact}; @@ -34,7 +34,7 @@ pub struct TxEip2930 { /// be transferred to the message call’s recipient or, /// in the case of contract creation, as an endowment /// to the newly created account; formally Tv. - pub value: TxValue, + pub value: U256, /// The accessList specifies a list of addresses and storage keys; /// these addresses and storage keys are added into the `accessed_addresses` /// and `accessed_storage_keys` global sets (introduced in EIP-2929). @@ -58,7 +58,7 @@ impl TxEip2930 { mem::size_of::() + // gas_price mem::size_of::() + // gas_limit self.to.size() + // to - mem::size_of::() + // value + mem::size_of::() + // value self.access_list.size() + // access_list self.input.len() // input } @@ -204,7 +204,7 @@ mod tests { gas_price: 1, gas_limit: 2, to: TransactionKind::Create, - value: 3_u64.into(), + value: U256::from(3), input: Bytes::from(vec![1, 2]), access_list: Default::default(), }); @@ -227,7 +227,7 @@ mod tests { gas_price: 1, gas_limit: 2, to: TransactionKind::Call(Address::default()), - value: 3_u64.into(), + value: U256::from(3), input: Bytes::from(vec![1, 2]), access_list: Default::default(), }); diff --git a/crates/primitives/src/transaction/eip4844.rs b/crates/primitives/src/transaction/eip4844.rs index 11eb6e85684b..3cc6297da5ad 100644 --- a/crates/primitives/src/transaction/eip4844.rs +++ b/crates/primitives/src/transaction/eip4844.rs @@ -1,7 +1,7 @@ use super::access_list::AccessList; use crate::{ constants::eip4844::DATA_GAS_PER_BLOB, keccak256, Bytes, ChainId, Signature, TransactionKind, - TxType, TxValue, B256, + TxType, B256, U256, }; use alloy_rlp::{length_of_length, Decodable, Encodable, Header}; use bytes::BytesMut; @@ -60,7 +60,7 @@ pub struct TxEip4844 { /// be transferred to the message call’s recipient or, /// in the case of contract creation, as an endowment /// to the newly created account; formally Tv. - pub value: TxValue, + pub value: U256, /// The accessList specifies a list of addresses and storage keys; /// these addresses and storage keys are added into the `accessed_addresses` /// and `accessed_storage_keys` global sets (introduced in EIP-2929). @@ -244,7 +244,7 @@ impl TxEip4844 { mem::size_of::() + // max_fee_per_gas mem::size_of::() + // max_priority_fee_per_gas self.to.size() + // to - mem::size_of::() + // value + mem::size_of::() + // value self.access_list.size() + // access_list self.input.len() + // input self.blob_versioned_hashes.capacity() * mem::size_of::() + // blob hashes size diff --git a/crates/primitives/src/transaction/legacy.rs b/crates/primitives/src/transaction/legacy.rs index f717764dc2c8..eba89f93dcbe 100644 --- a/crates/primitives/src/transaction/legacy.rs +++ b/crates/primitives/src/transaction/legacy.rs @@ -1,4 +1,4 @@ -use crate::{keccak256, Bytes, ChainId, Signature, TransactionKind, TxType, TxValue, B256}; +use crate::{keccak256, Bytes, ChainId, Signature, TransactionKind, TxType, B256, U256}; use alloy_rlp::{length_of_length, Encodable, Header}; use bytes::BytesMut; use reth_codecs::{main_codec, Compact}; @@ -33,7 +33,7 @@ pub struct TxLegacy { /// be transferred to the message call’s recipient or, /// in the case of contract creation, as an endowment /// to the newly created account; formally Tv. - pub value: TxValue, + pub value: U256, /// Input has two uses depending if transaction is Create or Call (if `to` field is None or /// Some). pub init: An unlimited size byte array specifying the /// EVM-code for the account initialisation procedure CREATE, @@ -51,7 +51,7 @@ impl TxLegacy { mem::size_of::() + // gas_price mem::size_of::() + // gas_limit self.to.size() + // to - mem::size_of::() + // value + mem::size_of::() + // value self.input.len() // input } @@ -191,7 +191,7 @@ mod tests { gas_price: 0xfa56ea00, gas_limit: 119902, to: TransactionKind::Call( hex!("06012c8cf97bead5deae237070f9587f8e7a266d").into()), - value: 0x1c6bf526340000u64.into(), + value: U256::from(0x1c6bf526340000u64), input: hex!("f7d8c88300000000000000000000000000000000000000000000000000000000000cee6100000000000000000000000000000000000000000000000000000000000ac3e1").into(), }); diff --git a/crates/primitives/src/transaction/mod.rs b/crates/primitives/src/transaction/mod.rs index 7bd67a570c7f..565b88c55398 100644 --- a/crates/primitives/src/transaction/mod.rs +++ b/crates/primitives/src/transaction/mod.rs @@ -1,6 +1,6 @@ use crate::{ compression::{TRANSACTION_COMPRESSOR, TRANSACTION_DECOMPRESSOR}, - keccak256, Address, BlockHashOrNumber, Bytes, TxHash, B256, + keccak256, Address, BlockHashOrNumber, Bytes, TxHash, B256, U256, }; use alloy_rlp::{ Decodable, Encodable, Error as RlpError, Header, EMPTY_LIST_CODE, EMPTY_STRING_CODE, @@ -32,7 +32,6 @@ pub use signature::Signature; pub use tx_type::{ TxType, EIP1559_TX_TYPE_ID, EIP2930_TX_TYPE_ID, EIP4844_TX_TYPE_ID, LEGACY_TX_TYPE_ID, }; -pub use tx_value::TxValue; pub use variant::TransactionSignedVariant; mod access_list; @@ -48,7 +47,6 @@ mod pooled; mod sidecar; mod signature; mod tx_type; -mod tx_value; pub(crate) mod util; mod variant; @@ -192,7 +190,7 @@ impl Transaction { } /// Gets the transaction's value field. - pub fn value(&self) -> TxValue { + pub fn value(&self) -> U256 { *match self { Transaction::Legacy(TxLegacy { value, .. }) | Transaction::Eip2930(TxEip2930 { value, .. }) | @@ -481,7 +479,7 @@ impl Transaction { } /// This sets the transaction's value. - pub fn set_value(&mut self, value: TxValue) { + pub fn set_value(&mut self, value: U256) { match self { Transaction::Legacy(tx) => tx.value = value, Transaction::Eip2930(tx) => tx.value = value, @@ -1691,7 +1689,7 @@ mod tests { to: TransactionKind::Call( Address::from_str("d3e8763675e4c425df46cc3b5c0f6cbdac396046").unwrap(), ), - value: 1000000000000000_u64.into(), + value: U256::from(1000000000000000u64), input: Bytes::default(), }); let signature = Signature { @@ -1713,7 +1711,7 @@ mod tests { to: TransactionKind::Call(Address::from_slice( &hex!("d3e8763675e4c425df46cc3b5c0f6cbdac396046")[..], )), - value: 693361000000000_u64.into(), + value: U256::from(693361000000000u64), input: Default::default(), }); let signature = Signature { @@ -1734,7 +1732,7 @@ mod tests { to: TransactionKind::Call(Address::from_slice( &hex!("d3e8763675e4c425df46cc3b5c0f6cbdac396046")[..], )), - value: 1000000000000000_u64.into(), + value: U256::from(1000000000000000u64), input: Bytes::default(), }); let signature = Signature { @@ -1756,7 +1754,7 @@ mod tests { to: TransactionKind::Call(Address::from_slice( &hex!("61815774383099e24810ab832a5b2a5425c154d5")[..], )), - value: 3000000000000000000_u64.into(), + value: U256::from(3000000000000000000u64), input: Default::default(), access_list: Default::default(), }); @@ -1778,7 +1776,7 @@ mod tests { to: TransactionKind::Call(Address::from_slice( &hex!("cf7f9e66af820a19257a2108375b180b0ec49167")[..], )), - value: 1234_u64.into(), + value: U256::from(1234), input: Bytes::default(), }); let signature = Signature { diff --git a/crates/primitives/src/transaction/optimism.rs b/crates/primitives/src/transaction/optimism.rs index 400074bd0ad7..97d9edc5873a 100644 --- a/crates/primitives/src/transaction/optimism.rs +++ b/crates/primitives/src/transaction/optimism.rs @@ -1,4 +1,4 @@ -use crate::{Address, Bytes, TransactionKind, TxType, TxValue, B256}; +use crate::{Address, Bytes, TransactionKind, TxType, B256, U256}; use alloy_rlp::{ length_of_length, Decodable, Encodable, Error as DecodeError, Header, EMPTY_STRING_CODE, }; @@ -20,7 +20,7 @@ pub struct TxDeposit { /// The ETH value to mint on L2. pub mint: Option, /// The ETH value to send to the recipient account. - pub value: TxValue, + pub value: U256, /// The gas limit for the L2 transaction. pub gas_limit: u64, /// Field indicating if this transaction is exempt from the L2 gas limit. @@ -38,7 +38,7 @@ impl TxDeposit { mem::size_of::
() + // from self.to.size() + // to mem::size_of::>() + // mint - mem::size_of::() + // value + mem::size_of::() + // value mem::size_of::() + // gas_limit mem::size_of::() + // is_system_transaction self.input.len() // input @@ -171,7 +171,7 @@ mod tests { from: Address::default(), to: TransactionKind::default(), mint: Some(100), - value: TxValue::default(), + value: U256::default(), gas_limit: 50000, is_system_transaction: true, input: Bytes::default(), @@ -191,7 +191,7 @@ mod tests { from: Address::default(), to: TransactionKind::default(), mint: Some(100), - value: TxValue::default(), + value: U256::default(), gas_limit: 50000, is_system_transaction: true, input: Bytes::default(), @@ -213,7 +213,7 @@ mod tests { from: Address::default(), to: TransactionKind::default(), mint: Some(100), - value: TxValue::default(), + value: U256::default(), gas_limit: 50000, is_system_transaction: true, input: Bytes::default(), diff --git a/crates/primitives/src/transaction/tx_value.rs b/crates/primitives/src/transaction/tx_value.rs deleted file mode 100644 index da1c133d0c0e..000000000000 --- a/crates/primitives/src/transaction/tx_value.rs +++ /dev/null @@ -1,128 +0,0 @@ -use crate::{ruint::UintTryFrom, U256}; -use alloy_rlp::{RlpDecodableWrapper, RlpEncodableWrapper}; -use reth_codecs::{add_arbitrary_tests, Compact}; -use serde::{Deserialize, Serialize}; - -/// TxValue is the type of the `value` field in the various Ethereum transactions structs. -/// -/// While the field is 256 bits, for many chains it's not possible for the field to use -/// this full precision, hence we use a wrapper type to allow for overriding of encoding. -#[add_arbitrary_tests(compact, rlp)] -#[derive( - Default, - Debug, - Copy, - Clone, - Hash, - PartialEq, - Eq, - Serialize, - Deserialize, - RlpEncodableWrapper, - RlpDecodableWrapper, -)] -pub struct TxValue(U256); - -impl From for U256 { - #[inline] - fn from(value: TxValue) -> U256 { - value.0 - } -} - -impl From for TxValue -where - U256: UintTryFrom, -{ - #[inline] - #[track_caller] - fn from(value: T) -> Self { - Self(U256::uint_try_from(value).unwrap()) - } -} - -/// As ethereum circulation on mainnet is around 120mil eth as of 2022 that is around -/// 120000000000000000000000000 wei we are safe to use u128 for TxValue's encoding -/// as its max number is 340282366920938463463374607431768211455. -/// This optimization should be disabled for chains such as Optimism, where -/// some tx values may require more than 128-bit precision. -impl Compact for TxValue { - #[inline] - fn to_compact(self, buf: &mut B) -> usize - where - B: bytes::BufMut + AsMut<[u8]>, - { - #[cfg(feature = "optimism")] - { - self.0.to_compact(buf) - } - #[cfg(not(feature = "optimism"))] - { - self.0.to::().to_compact(buf) - } - } - - #[inline] - fn from_compact(buf: &[u8], identifier: usize) -> (Self, &[u8]) { - #[cfg(feature = "optimism")] - { - let (i, buf) = U256::from_compact(buf, identifier); - (TxValue(i), buf) - } - #[cfg(not(feature = "optimism"))] - { - let (i, buf) = u128::from_compact(buf, identifier); - (TxValue::from(i), buf) - } - } -} - -impl std::fmt::Display for TxValue { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - self.0.fmt(f) - } -} - -impl PartialEq for TxValue { - fn eq(&self, other: &U256) -> bool { - self.0.eq(other) - } -} - -#[cfg(any(test, feature = "arbitrary"))] -impl<'a> arbitrary::Arbitrary<'a> for TxValue { - #[inline] - fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result { - #[cfg(feature = "optimism")] - { - U256::arbitrary(u).map(Self) - } - #[cfg(not(feature = "optimism"))] - { - u128::arbitrary(u).map(Self::from) - } - } -} - -#[cfg(any(test, feature = "arbitrary"))] -impl proptest::arbitrary::Arbitrary for TxValue { - type Parameters = ::Parameters; - #[inline] - fn arbitrary_with((): Self::Parameters) -> Self::Strategy { - use proptest::strategy::Strategy; - - #[cfg(feature = "optimism")] - { - proptest::prelude::any::().prop_map(Self) - } - #[cfg(not(feature = "optimism"))] - { - proptest::prelude::any::().prop_map(Self::from) - } - } - #[cfg(feature = "optimism")] - type Strategy = proptest::arbitrary::Mapped; - - #[cfg(not(feature = "optimism"))] - type Strategy = proptest::arbitrary::Mapped; -} diff --git a/crates/prune/Cargo.toml b/crates/prune/Cargo.toml index 750cd84d6bf0..6699c567bc29 100644 --- a/crates/prune/Cargo.toml +++ b/crates/prune/Cargo.toml @@ -17,7 +17,7 @@ reth-primitives.workspace = true reth-db.workspace = true reth-provider.workspace = true reth-interfaces.workspace = true -reth-snapshot.workspace = true +reth-static-file.workspace = true reth-tokio-util.workspace = true reth-config.workspace = true diff --git a/crates/prune/src/builder.rs b/crates/prune/src/builder.rs index 7ccf665d98a9..5836688bf0fa 100644 --- a/crates/prune/src/builder.rs +++ b/crates/prune/src/builder.rs @@ -3,7 +3,6 @@ use reth_config::PruneConfig; use reth_db::database::Database; use reth_primitives::{PruneModes, MAINNET}; use reth_provider::ProviderFactory; -use reth_snapshot::HighestSnapshotsTracker; /// Contains the information required to build a pruner #[derive(Debug, Clone, PartialEq, Eq)] @@ -53,11 +52,7 @@ impl PrunerBuilder { } /// Builds a [Pruner] from the current configuration. - pub fn build( - self, - provider_factory: ProviderFactory, - highest_snapshots_rx: HighestSnapshotsTracker, - ) -> Pruner { + pub fn build(self, provider_factory: ProviderFactory) -> Pruner { let segments = SegmentSet::::from_prune_modes(self.segments); Pruner::new( @@ -66,7 +61,6 @@ impl PrunerBuilder { self.block_interval, self.prune_delete_limit, self.max_reorg_depth, - highest_snapshots_rx, ) } } diff --git a/crates/prune/src/event.rs b/crates/prune/src/event.rs index 7599b809b608..c2c3808d1374 100644 --- a/crates/prune/src/event.rs +++ b/crates/prune/src/event.rs @@ -4,6 +4,8 @@ use std::{collections::BTreeMap, time::Duration}; /// An event emitted by a [Pruner][crate::Pruner]. #[derive(Debug, PartialEq, Eq, Clone)] pub enum PrunerEvent { + /// Emitted when pruner started running. + Started { tip_block_number: BlockNumber }, /// Emitted when pruner finished running. Finished { tip_block_number: BlockNumber, diff --git a/crates/prune/src/pruner.rs b/crates/prune/src/pruner.rs index 649c3b0925ca..459383204f19 100644 --- a/crates/prune/src/pruner.rs +++ b/crates/prune/src/pruner.rs @@ -6,13 +6,14 @@ use crate::{ Metrics, PrunerError, PrunerEvent, }; use reth_db::database::Database; -use reth_primitives::{BlockNumber, PruneMode, PruneProgress, PruneSegment}; -use reth_provider::{ProviderFactory, PruneCheckpointReader}; -use reth_snapshot::HighestSnapshotsTracker; +use reth_primitives::{ + BlockNumber, PruneMode, PruneProgress, PrunePurpose, PruneSegment, StaticFileSegment, +}; +use reth_provider::{DatabaseProviderRW, ProviderFactory, PruneCheckpointReader}; use reth_tokio_util::EventListeners; -use std::{collections::BTreeMap, sync::Arc, time::Instant}; +use std::{collections::BTreeMap, time::Instant}; use tokio_stream::wrappers::UnboundedReceiverStream; -use tracing::{debug, trace}; +use tracing::debug; /// Result of [Pruner::run] execution. pub type PrunerResult = Result; @@ -20,11 +21,13 @@ pub type PrunerResult = Result; /// The pruner type itself with the result of [Pruner::run] pub type PrunerWithResult = (Pruner, PrunerResult); +type PrunerStats = BTreeMap; + /// Pruning routine. Main pruning logic happens in [Pruner::run]. #[derive(Debug)] pub struct Pruner { provider_factory: ProviderFactory, - segments: Vec>>, + segments: Vec>>, /// Minimum pruning interval measured in blocks. All prune segments are checked and, if needed, /// pruned, when the chain advances by the specified number of blocks. min_block_interval: usize, @@ -37,8 +40,6 @@ pub struct Pruner { /// Maximum number of blocks to be pruned per run, as an additional restriction to /// `previous_tip_block_number`. prune_max_blocks_per_run: usize, - #[allow(dead_code)] - highest_snapshots_tracker: HighestSnapshotsTracker, metrics: Metrics, listeners: EventListeners, } @@ -47,11 +48,10 @@ impl Pruner { /// Creates a new [Pruner]. pub fn new( provider_factory: ProviderFactory, - segments: Vec>>, + segments: Vec>>, min_block_interval: usize, delete_limit: usize, prune_max_blocks_per_run: usize, - highest_snapshots_tracker: HighestSnapshotsTracker, ) -> Self { Self { provider_factory, @@ -60,13 +60,12 @@ impl Pruner { previous_tip_block_number: None, delete_limit, prune_max_blocks_per_run, - highest_snapshots_tracker, metrics: Metrics::default(), listeners: Default::default(), } } - /// Listen for events on the prune. + /// Listen for events on the pruner. pub fn events(&mut self) -> UnboundedReceiverStream { self.listeners.new_listener() } @@ -76,20 +75,14 @@ impl Pruner { if tip_block_number == 0 { self.previous_tip_block_number = Some(tip_block_number); - trace!(target: "pruner", %tip_block_number, "Nothing to prune yet"); + debug!(target: "pruner", %tip_block_number, "Nothing to prune yet"); return Ok(PruneProgress::Finished) } - trace!(target: "pruner", %tip_block_number, "Pruner started"); - let start = Instant::now(); - - let provider = self.provider_factory.provider_rw()?; + self.listeners.notify(PrunerEvent::Started { tip_block_number }); - let mut done = true; - let mut stats = BTreeMap::new(); - - // TODO(alexey): prune snapshotted segments of data (headers, transactions) - let highest_snapshots = *self.highest_snapshots_tracker.borrow(); + debug!(target: "pruner", %tip_block_number, "Pruner started"); + let start = Instant::now(); // Multiply `self.delete_limit` (number of rows to delete per block) by number of blocks // since last pruner run. `self.previous_tip_block_number` is close to @@ -106,34 +99,80 @@ impl Pruner { tip_block_number.saturating_sub(previous_tip_block_number) as usize })) .min(self.prune_max_blocks_per_run); - let mut delete_limit = self.delete_limit * blocks_since_last_run; + let delete_limit = self.delete_limit * blocks_since_last_run; + + let provider = self.provider_factory.provider_rw()?; + let (stats, delete_limit, progress) = + self.prune_segments(&provider, tip_block_number, delete_limit)?; + provider.commit()?; + + self.previous_tip_block_number = Some(tip_block_number); + + let elapsed = start.elapsed(); + self.metrics.duration_seconds.record(elapsed); + + debug!( + target: "pruner", + %tip_block_number, + ?elapsed, + %delete_limit, + ?progress, + ?stats, + "Pruner finished" + ); - for segment in &self.segments { + self.listeners.notify(PrunerEvent::Finished { tip_block_number, elapsed, stats }); + + Ok(progress) + } + + /// Prunes the segments that the [Pruner] was initialized with, and the segments that needs to + /// be pruned according to the highest static_files. + /// + /// Returns [PrunerStats], `delete_limit` that remained after pruning all segments, and + /// [PruneProgress]. + fn prune_segments( + &mut self, + provider: &DatabaseProviderRW, + tip_block_number: BlockNumber, + mut delete_limit: usize, + ) -> Result<(PrunerStats, usize, PruneProgress), PrunerError> { + let static_file_segments = self.static_file_segments(); + let segments = static_file_segments + .iter() + .map(|segment| (segment, PrunePurpose::StaticFile)) + .chain(self.segments.iter().map(|segment| (segment, PrunePurpose::User))); + + let mut done = true; + let mut stats = PrunerStats::new(); + + for (segment, purpose) in segments { if delete_limit == 0 { break } if let Some((to_block, prune_mode)) = segment .mode() - .map(|mode| mode.prune_target_block(tip_block_number, segment.segment())) + .map(|mode| mode.prune_target_block(tip_block_number, segment.segment(), purpose)) .transpose()? .flatten() { - trace!( + debug!( target: "pruner", segment = ?segment.segment(), + ?purpose, %to_block, ?prune_mode, - "Got target block to prune" + "Segment pruning started" ); let segment_start = Instant::now(); let previous_checkpoint = provider.get_prune_checkpoint(segment.segment())?; let output = segment - .prune(&provider, PruneInput { previous_checkpoint, to_block, delete_limit })?; + .prune(provider, PruneInput { previous_checkpoint, to_block, delete_limit })?; if let Some(checkpoint) = output.checkpoint { segment - .save_checkpoint(&provider, checkpoint.as_prune_checkpoint(prune_mode))?; + .save_checkpoint(provider, checkpoint.as_prune_checkpoint(prune_mode))?; } self.metrics .get_prune_segment_metrics(segment.segment()) @@ -147,112 +186,70 @@ impl Pruner { done = done && output.done; delete_limit = delete_limit.saturating_sub(output.pruned); - stats.insert( - segment.segment(), - (PruneProgress::from_done(output.done), output.pruned), - ); - } else { - trace!(target: "pruner", segment = ?segment.segment(), "No target block to prune"); - } - } - if let Some(snapshots) = highest_snapshots { - if let (Some(to_block), true) = (snapshots.headers, delete_limit > 0) { - let prune_mode = PruneMode::Before(to_block + 1); - trace!( + debug!( target: "pruner", - prune_segment = ?PruneSegment::Headers, + segment = ?segment.segment(), + ?purpose, %to_block, ?prune_mode, - "Got target block to prune" + %output.pruned, + "Segment pruning finished" ); - let segment_start = Instant::now(); - let segment = segments::Headers::new(prune_mode); - let previous_checkpoint = provider.get_prune_checkpoint(PruneSegment::Headers)?; - let output = segment - .prune(&provider, PruneInput { previous_checkpoint, to_block, delete_limit })?; - if let Some(checkpoint) = output.checkpoint { - segment - .save_checkpoint(&provider, checkpoint.as_prune_checkpoint(prune_mode))?; + if output.pruned > 0 { + stats.insert( + segment.segment(), + (PruneProgress::from_done(output.done), output.pruned), + ); } - self.metrics - .get_prune_segment_metrics(PruneSegment::Headers) - .duration_seconds - .record(segment_start.elapsed()); - - done = done && output.done; - delete_limit = delete_limit.saturating_sub(output.pruned); - stats.insert( - PruneSegment::Headers, - (PruneProgress::from_done(output.done), output.pruned), - ); + } else { + debug!(target: "pruner", segment = ?segment.segment(), ?purpose, "Nothing to prune for the segment"); } + } - if let (Some(to_block), true) = (snapshots.transactions, delete_limit > 0) { - let prune_mode = PruneMode::Before(to_block + 1); - trace!( - target: "pruner", - prune_segment = ?PruneSegment::Transactions, - %to_block, - ?prune_mode, - "Got target block to prune" - ); + Ok((stats, delete_limit, PruneProgress::from_done(done))) + } - let segment_start = Instant::now(); - let segment = segments::Transactions::new(prune_mode); - let previous_checkpoint = provider.get_prune_checkpoint(PruneSegment::Headers)?; - let output = segment - .prune(&provider, PruneInput { previous_checkpoint, to_block, delete_limit })?; - if let Some(checkpoint) = output.checkpoint { - segment - .save_checkpoint(&provider, checkpoint.as_prune_checkpoint(prune_mode))?; - } - self.metrics - .get_prune_segment_metrics(PruneSegment::Transactions) - .duration_seconds - .record(segment_start.elapsed()); + /// Returns pre-configured segments that needs to be pruned according to the highest + /// static_files for [PruneSegment::Transactions], [PruneSegment::Headers] and + /// [PruneSegment::Receipts]. + fn static_file_segments(&self) -> Vec>> { + let mut segments = Vec::>>::new(); - done = done && output.done; - delete_limit = delete_limit.saturating_sub(output.pruned); - stats.insert( - PruneSegment::Transactions, - (PruneProgress::from_done(output.done), output.pruned), - ); - } - } - - provider.commit()?; - self.previous_tip_block_number = Some(tip_block_number); + let static_file_provider = self.provider_factory.static_file_provider(); - let elapsed = start.elapsed(); - self.metrics.duration_seconds.record(elapsed); + if let Some(to_block) = + static_file_provider.get_highest_static_file_block(StaticFileSegment::Transactions) + { + segments + .push(Box::new(segments::Transactions::new(PruneMode::before_inclusive(to_block)))) + } - trace!( - target: "pruner", - %tip_block_number, - ?elapsed, - %delete_limit, - %done, - ?stats, - "Pruner finished" - ); + if let Some(to_block) = + static_file_provider.get_highest_static_file_block(StaticFileSegment::Headers) + { + segments.push(Box::new(segments::Headers::new(PruneMode::before_inclusive(to_block)))) + } - self.listeners.notify(PrunerEvent::Finished { tip_block_number, elapsed, stats }); + if let Some(to_block) = + static_file_provider.get_highest_static_file_block(StaticFileSegment::Receipts) + { + segments.push(Box::new(segments::Receipts::new(PruneMode::before_inclusive(to_block)))) + } - Ok(PruneProgress::from_done(done)) + segments } /// Returns `true` if the pruning is needed at the provided tip block number. /// This determined by the check against minimum pruning interval and last pruned block number. pub fn is_pruning_needed(&self, tip_block_number: BlockNumber) -> bool { - if self.previous_tip_block_number.map_or(true, |previous_tip_block_number| { - // Saturating subtraction is needed for the case when the chain was reverted, meaning - // current block number might be less than the previous tip block number. - // If that's the case, no pruning is needed as outdated data is also reverted. - tip_block_number.saturating_sub(previous_tip_block_number) >= - self.min_block_interval as u64 - }) { + // Saturating subtraction is needed for the case when the chain was reverted, meaning + // current block number might be less than the previous tip block number. + // If that's the case, no pruning is needed as outdated data is also reverted. + if tip_block_number.saturating_sub(self.previous_tip_block_number.unwrap_or_default()) >= + self.min_block_interval as u64 + { debug!( target: "pruner", previous_tip_block_number = ?self.previous_tip_block_number, @@ -269,20 +266,21 @@ impl Pruner { #[cfg(test)] mod tests { use crate::Pruner; - use reth_db::test_utils::create_test_rw_db; + use reth_db::test_utils::{create_test_rw_db, create_test_static_files_dir}; use reth_primitives::MAINNET; use reth_provider::ProviderFactory; - use tokio::sync::watch; #[test] fn is_pruning_needed() { let db = create_test_rw_db(); - let provider_factory = ProviderFactory::new(db, MAINNET.clone()); - let mut pruner = Pruner::new(provider_factory, vec![], 5, 0, 5, watch::channel(None).1); + let provider_factory = + ProviderFactory::new(db, MAINNET.clone(), create_test_static_files_dir()) + .expect("create provide factory with static_files"); + let mut pruner = Pruner::new(provider_factory, vec![], 5, 0, 5); // No last pruned block number was set before let first_block_number = 1; - assert!(pruner.is_pruning_needed(first_block_number)); + assert!(!pruner.is_pruning_needed(first_block_number)); pruner.previous_tip_block_number = Some(first_block_number); // Tip block number delta is >= than min block interval diff --git a/crates/prune/src/segments/account_history.rs b/crates/prune/src/segments/account_history.rs index bfebad1a95c0..a18897640baa 100644 --- a/crates/prune/src/segments/account_history.rs +++ b/crates/prune/src/segments/account_history.rs @@ -46,7 +46,7 @@ impl Segment for AccountHistory { let mut last_changeset_pruned_block = None; let (pruned_changesets, done) = provider - .prune_table_with_range::( + .prune_table_with_range::( range, input.delete_limit / 2, |_| false, @@ -60,7 +60,7 @@ impl Segment for AccountHistory { .map(|block_number| if done { block_number } else { block_number.saturating_sub(1) }) .unwrap_or(range_end); - let (processed, pruned_indices) = prune_history_indices::( + let (processed, pruned_indices) = prune_history_indices::( provider, last_changeset_pruned_block, |a, b| a.key == b.key, @@ -86,11 +86,11 @@ mod tests { use reth_db::{tables, BlockNumberList}; use reth_interfaces::test_utils::{ generators, - generators::{random_block_range, random_changeset_range, random_eoa_account_range}, + generators::{random_block_range, random_changeset_range, random_eoa_accounts}, }; use reth_primitives::{BlockNumber, PruneCheckpoint, PruneMode, PruneSegment, B256}; use reth_provider::PruneCheckpointReader; - use reth_stages::test_utils::TestStageDB; + use reth_stages::test_utils::{StorageKind, TestStageDB}; use std::{collections::BTreeMap, ops::AddAssign}; #[test] @@ -99,10 +99,9 @@ mod tests { let mut rng = generators::rng(); let blocks = random_block_range(&mut rng, 1..=5000, B256::ZERO, 0..1); - db.insert_blocks(blocks.iter(), None).expect("insert blocks"); + db.insert_blocks(blocks.iter(), StorageKind::Database(None)).expect("insert blocks"); - let accounts = - random_eoa_account_range(&mut rng, 0..2).into_iter().collect::>(); + let accounts = random_eoa_accounts(&mut rng, 2).into_iter().collect::>(); let (changesets, _) = random_changeset_range( &mut rng, @@ -114,7 +113,7 @@ mod tests { db.insert_changesets(changesets.clone(), None).expect("insert changesets"); db.insert_history(changesets.clone(), None).expect("insert history"); - let account_occurrences = db.table::().unwrap().into_iter().fold( + let account_occurrences = db.table::().unwrap().into_iter().fold( BTreeMap::<_, usize>::new(), |mut map, (key, _)| { map.entry(key.key).or_default().add_assign(1); @@ -124,11 +123,11 @@ mod tests { assert!(account_occurrences.into_iter().any(|(_, occurrences)| occurrences > 1)); assert_eq!( - db.table::().unwrap().len(), + db.table::().unwrap().len(), changesets.iter().flatten().count() ); - let original_shards = db.table::().unwrap(); + let original_shards = db.table::().unwrap(); let test_prune = |to_block: BlockNumber, run: usize, expected_result: (bool, usize)| { let prune_mode = PruneMode::Before(to_block); @@ -202,19 +201,19 @@ mod tests { ); assert_eq!( - db.table::().unwrap().len(), + db.table::().unwrap().len(), pruned_changesets.values().flatten().count() ); - let actual_shards = db.table::().unwrap(); + let actual_shards = db.table::().unwrap(); let expected_shards = original_shards .iter() .filter(|(key, _)| key.highest_block_number > last_pruned_block_number) .map(|(key, blocks)| { let new_blocks = blocks - .iter(0) - .skip_while(|block| *block <= last_pruned_block_number as usize) + .iter() + .skip_while(|block| *block <= last_pruned_block_number) .collect::>(); (key.clone(), BlockNumberList::new_pre_sorted(new_blocks)) }) diff --git a/crates/prune/src/segments/headers.rs b/crates/prune/src/segments/headers.rs index f0e1754422d1..2da191375899 100644 --- a/crates/prune/src/segments/headers.rs +++ b/crates/prune/src/segments/headers.rs @@ -52,13 +52,13 @@ impl Segment for Headers { } let results = [ - self.prune_table::( + self.prune_table::(provider, block_range.clone(), delete_limit)?, + self.prune_table::( provider, block_range.clone(), delete_limit, )?, - self.prune_table::(provider, block_range.clone(), delete_limit)?, - self.prune_table::(provider, block_range, delete_limit)?, + self.prune_table::(provider, block_range, delete_limit)?, ]; if !results.iter().map(|(_, _, last_pruned_block)| last_pruned_block).all_equal() { @@ -112,9 +112,9 @@ impl Headers { mod tests { use crate::segments::{Headers, PruneInput, PruneOutput, Segment}; use assert_matches::assert_matches; - use reth_db::tables; + use reth_db::{tables, transaction::DbTx}; use reth_interfaces::test_utils::{generators, generators::random_header_range}; - use reth_primitives::{BlockNumber, PruneCheckpoint, PruneMode, PruneSegment, B256}; + use reth_primitives::{BlockNumber, PruneCheckpoint, PruneMode, PruneSegment, B256, U256}; use reth_provider::PruneCheckpointReader; use reth_stages::test_utils::TestStageDB; @@ -124,11 +124,15 @@ mod tests { let mut rng = generators::rng(); let headers = random_header_range(&mut rng, 0..100, B256::ZERO); - db.insert_headers_with_td(headers.iter()).expect("insert headers"); + let tx = db.factory.provider_rw().unwrap().into_tx(); + for header in headers.iter() { + TestStageDB::insert_header(None, &tx, header, U256::ZERO).unwrap(); + } + tx.commit().unwrap(); assert_eq!(db.table::().unwrap().len(), headers.len()); assert_eq!(db.table::().unwrap().len(), headers.len()); - assert_eq!(db.table::().unwrap().len(), headers.len()); + assert_eq!(db.table::().unwrap().len(), headers.len()); let test_prune = |to_block: BlockNumber, expected_result: (bool, usize)| { let prune_mode = PruneMode::Before(to_block); @@ -181,7 +185,7 @@ mod tests { headers.len() - (last_pruned_block_number + 1) as usize ); assert_eq!( - db.table::().unwrap().len(), + db.table::().unwrap().len(), headers.len() - (last_pruned_block_number + 1) as usize ); assert_eq!( diff --git a/crates/prune/src/segments/history.rs b/crates/prune/src/segments/history.rs index 4836eeb84154..0f712e79e12b 100644 --- a/crates/prune/src/segments/history.rs +++ b/crates/prune/src/segments/history.rs @@ -54,11 +54,11 @@ where // contain the target block number, as it's in this shard. else { let new_blocks = - blocks.iter(0).skip_while(|block| *block <= to_block as usize).collect::>(); + blocks.iter().skip_while(|block| *block <= to_block).collect::>(); // If there were blocks less than or equal to the target one // (so the shard has changed), update the shard. - if blocks.len() != new_blocks.len() { + if blocks.len() as usize != new_blocks.len() { // If there are no more blocks in this shard, we need to remove it, as empty // shards are not allowed. if new_blocks.is_empty() { diff --git a/crates/prune/src/segments/mod.rs b/crates/prune/src/segments/mod.rs index 339c4e013745..5e644e227e12 100644 --- a/crates/prune/src/segments/mod.rs +++ b/crates/prune/src/segments/mod.rs @@ -95,10 +95,18 @@ impl PruneInput { .unwrap_or(0); let to_tx_number = match provider.block_body_indices(self.to_block)? { - Some(body) => body, + Some(body) => { + let last_tx = body.last_tx_num(); + if last_tx + body.tx_count() == 0 { + // Prevents a scenario where the pruner correctly starts at a finalized block, + // but the first transaction (tx_num = 0) only appears on an unfinalized one. + // Should only happen on a test/hive scenario. + return Ok(None) + } + last_tx + } None => return Ok(None), - } - .last_tx_num(); + }; let range = from_tx_number..=to_tx_number; if range.is_empty() { diff --git a/crates/prune/src/segments/receipts.rs b/crates/prune/src/segments/receipts.rs index fdd4d0402e40..d1ce5324e6af 100644 --- a/crates/prune/src/segments/receipts.rs +++ b/crates/prune/src/segments/receipts.rs @@ -99,7 +99,7 @@ mod tests { }; use reth_primitives::{BlockNumber, PruneCheckpoint, PruneMode, PruneSegment, TxNumber, B256}; use reth_provider::PruneCheckpointReader; - use reth_stages::test_utils::TestStageDB; + use reth_stages::test_utils::{StorageKind, TestStageDB}; use std::ops::Sub; #[test] @@ -108,7 +108,7 @@ mod tests { let mut rng = generators::rng(); let blocks = random_block_range(&mut rng, 1..=10, B256::ZERO, 2..3); - db.insert_blocks(blocks.iter(), None).expect("insert blocks"); + db.insert_blocks(blocks.iter(), StorageKind::Database(None)).expect("insert blocks"); let mut receipts = Vec::new(); for block in &blocks { diff --git a/crates/prune/src/segments/receipts_by_logs.rs b/crates/prune/src/segments/receipts_by_logs.rs index 984072b30763..efcbfe761105 100644 --- a/crates/prune/src/segments/receipts_by_logs.rs +++ b/crates/prune/src/segments/receipts_by_logs.rs @@ -4,7 +4,8 @@ use crate::{ }; use reth_db::{database::Database, tables}; use reth_primitives::{ - PruneCheckpoint, PruneMode, PruneSegment, ReceiptsLogPruneConfig, MINIMUM_PRUNING_DISTANCE, + PruneCheckpoint, PruneMode, PrunePurpose, PruneSegment, ReceiptsLogPruneConfig, + MINIMUM_PRUNING_DISTANCE, }; use reth_provider::{BlockReader, DatabaseProviderRW, PruneCheckpointWriter, TransactionsProvider}; use tracing::{instrument, trace}; @@ -39,7 +40,7 @@ impl Segment for ReceiptsByLogs { // for the other receipts it's as if they had a `PruneMode::Distance()` of // `MINIMUM_PRUNING_DISTANCE`. let to_block = PruneMode::Distance(MINIMUM_PRUNING_DISTANCE) - .prune_target_block(input.to_block, PruneSegment::ContractLogs)? + .prune_target_block(input.to_block, PruneSegment::ContractLogs, PrunePurpose::User)? .map(|(bn, _)| bn) .unwrap_or_default(); @@ -217,7 +218,7 @@ mod tests { }; use reth_primitives::{PruneMode, PruneSegment, ReceiptsLogPruneConfig, B256}; use reth_provider::{PruneCheckpointReader, TransactionsProvider}; - use reth_stages::test_utils::TestStageDB; + use reth_stages::test_utils::{StorageKind, TestStageDB}; use std::collections::BTreeMap; #[test] @@ -232,7 +233,7 @@ mod tests { random_block_range(&mut rng, (tip - 100 + 1)..=tip, B256::ZERO, 1..5), ] .concat(); - db.insert_blocks(blocks.iter(), None).expect("insert blocks"); + db.insert_blocks(blocks.iter(), StorageKind::Database(None)).expect("insert blocks"); let mut receipts = Vec::new(); diff --git a/crates/prune/src/segments/sender_recovery.rs b/crates/prune/src/segments/sender_recovery.rs index ec2d189f55cc..0684fbd37bd4 100644 --- a/crates/prune/src/segments/sender_recovery.rs +++ b/crates/prune/src/segments/sender_recovery.rs @@ -43,7 +43,7 @@ impl Segment for SenderRecovery { let tx_range_end = *tx_range.end(); let mut last_pruned_transaction = tx_range_end; - let (pruned, done) = provider.prune_table_with_range::( + let (pruned, done) = provider.prune_table_with_range::( tx_range, input.delete_limit, |_| false, @@ -81,7 +81,7 @@ mod tests { use reth_interfaces::test_utils::{generators, generators::random_block_range}; use reth_primitives::{BlockNumber, PruneCheckpoint, PruneMode, PruneSegment, TxNumber, B256}; use reth_provider::PruneCheckpointReader; - use reth_stages::test_utils::TestStageDB; + use reth_stages::test_utils::{StorageKind, TestStageDB}; use std::ops::Sub; #[test] @@ -90,7 +90,7 @@ mod tests { let mut rng = generators::rng(); let blocks = random_block_range(&mut rng, 1..=10, B256::ZERO, 2..3); - db.insert_blocks(blocks.iter(), None).expect("insert blocks"); + db.insert_blocks(blocks.iter(), StorageKind::Database(None)).expect("insert blocks"); let mut transaction_senders = Vec::new(); for block in &blocks { @@ -110,7 +110,7 @@ mod tests { ); assert_eq!( db.table::().unwrap().len(), - db.table::().unwrap().len() + db.table::().unwrap().len() ); let test_prune = |to_block: BlockNumber, expected_result: (bool, usize)| { @@ -178,7 +178,7 @@ mod tests { last_pruned_block_number.checked_sub(if result.done { 0 } else { 1 }); assert_eq!( - db.table::().unwrap().len(), + db.table::().unwrap().len(), transaction_senders.len() - (last_pruned_tx_number + 1) ); assert_eq!( diff --git a/crates/prune/src/segments/set.rs b/crates/prune/src/segments/set.rs index 5fdfdf58077b..7978bd4e583e 100644 --- a/crates/prune/src/segments/set.rs +++ b/crates/prune/src/segments/set.rs @@ -4,12 +4,11 @@ use crate::segments::{ }; use reth_db::database::Database; use reth_primitives::PruneModes; -use std::sync::Arc; /// Collection of [Segment]. Thread-safe, allocated on the heap. #[derive(Debug)] pub struct SegmentSet { - inner: Vec>>, + inner: Vec>>, } impl SegmentSet { @@ -20,7 +19,7 @@ impl SegmentSet { /// Adds new [Segment] to collection. pub fn segment + 'static>(mut self, segment: S) -> Self { - self.inner.push(Arc::new(segment)); + self.inner.push(Box::new(segment)); self } @@ -33,7 +32,7 @@ impl SegmentSet { } /// Consumes [SegmentSet] and returns a [Vec]. - pub fn into_vec(self) -> Vec>> { + pub fn into_vec(self) -> Vec>> { self.inner } diff --git a/crates/prune/src/segments/storage_history.rs b/crates/prune/src/segments/storage_history.rs index 45713760c7da..eba8d1724242 100644 --- a/crates/prune/src/segments/storage_history.rs +++ b/crates/prune/src/segments/storage_history.rs @@ -50,7 +50,7 @@ impl Segment for StorageHistory { let mut last_changeset_pruned_block = None; let (pruned_changesets, done) = provider - .prune_table_with_range::( + .prune_table_with_range::( BlockNumberAddress::range(range), input.delete_limit / 2, |_| false, @@ -64,7 +64,7 @@ impl Segment for StorageHistory { .map(|block_number| if done { block_number } else { block_number.saturating_sub(1) }) .unwrap_or(range_end); - let (processed, pruned_indices) = prune_history_indices::( + let (processed, pruned_indices) = prune_history_indices::( provider, last_changeset_pruned_block, |a, b| a.address == b.address && a.sharded_key.key == b.sharded_key.key, @@ -90,11 +90,11 @@ mod tests { use reth_db::{tables, BlockNumberList}; use reth_interfaces::test_utils::{ generators, - generators::{random_block_range, random_changeset_range, random_eoa_account_range}, + generators::{random_block_range, random_changeset_range, random_eoa_accounts}, }; use reth_primitives::{BlockNumber, PruneCheckpoint, PruneMode, PruneSegment, B256}; use reth_provider::PruneCheckpointReader; - use reth_stages::test_utils::TestStageDB; + use reth_stages::test_utils::{StorageKind, TestStageDB}; use std::{collections::BTreeMap, ops::AddAssign}; #[test] @@ -103,22 +103,21 @@ mod tests { let mut rng = generators::rng(); let blocks = random_block_range(&mut rng, 0..=5000, B256::ZERO, 0..1); - db.insert_blocks(blocks.iter(), None).expect("insert blocks"); + db.insert_blocks(blocks.iter(), StorageKind::Database(None)).expect("insert blocks"); - let accounts = - random_eoa_account_range(&mut rng, 0..2).into_iter().collect::>(); + let accounts = random_eoa_accounts(&mut rng, 2).into_iter().collect::>(); let (changesets, _) = random_changeset_range( &mut rng, blocks.iter(), accounts.into_iter().map(|(addr, acc)| (addr, (acc, Vec::new()))), - 2..3, + 1..2, 1..2, ); db.insert_changesets(changesets.clone(), None).expect("insert changesets"); db.insert_history(changesets.clone(), None).expect("insert history"); - let storage_occurrences = db.table::().unwrap().into_iter().fold( + let storage_occurrences = db.table::().unwrap().into_iter().fold( BTreeMap::<_, usize>::new(), |mut map, (key, _)| { map.entry((key.address, key.sharded_key.key)).or_default().add_assign(1); @@ -128,11 +127,11 @@ mod tests { assert!(storage_occurrences.into_iter().any(|(_, occurrences)| occurrences > 1)); assert_eq!( - db.table::().unwrap().len(), + db.table::().unwrap().len(), changesets.iter().flatten().flat_map(|(_, _, entries)| entries).count() ); - let original_shards = db.table::().unwrap(); + let original_shards = db.table::().unwrap(); let test_prune = |to_block: BlockNumber, run: usize, expected_result: (bool, usize)| { let prune_mode = PruneMode::Before(to_block); @@ -144,7 +143,7 @@ mod tests { .get_prune_checkpoint(PruneSegment::StorageHistory) .unwrap(), to_block, - delete_limit: 2000, + delete_limit: 1000, }; let segment = StorageHistory::new(prune_mode); @@ -208,19 +207,19 @@ mod tests { ); assert_eq!( - db.table::().unwrap().len(), + db.table::().unwrap().len(), pruned_changesets.values().flatten().count() ); - let actual_shards = db.table::().unwrap(); + let actual_shards = db.table::().unwrap(); let expected_shards = original_shards .iter() .filter(|(key, _)| key.sharded_key.highest_block_number > last_pruned_block_number) .map(|(key, blocks)| { let new_blocks = blocks - .iter(0) - .skip_while(|block| *block <= last_pruned_block_number as usize) + .iter() + .skip_while(|block| *block <= last_pruned_block_number) .collect::>(); (key.clone(), BlockNumberList::new_pre_sorted(new_blocks)) }) @@ -242,8 +241,8 @@ mod tests { ); }; - test_prune(998, 1, (false, 1000)); - test_prune(998, 2, (true, 998)); - test_prune(1400, 3, (true, 804)); + test_prune(998, 1, (false, 500)); + test_prune(998, 2, (true, 499)); + test_prune(1200, 3, (true, 202)); } } diff --git a/crates/prune/src/segments/transaction_lookup.rs b/crates/prune/src/segments/transaction_lookup.rs index 342a764a68a6..379431448349 100644 --- a/crates/prune/src/segments/transaction_lookup.rs +++ b/crates/prune/src/segments/transaction_lookup.rs @@ -61,7 +61,7 @@ impl Segment for TransactionLookup { } let mut last_pruned_transaction = None; - let (pruned, _) = provider.prune_table_with_iterator::( + let (pruned, _) = provider.prune_table_with_iterator::( hashes, input.delete_limit, |row| { @@ -104,7 +104,7 @@ mod tests { use reth_interfaces::test_utils::{generators, generators::random_block_range}; use reth_primitives::{BlockNumber, PruneCheckpoint, PruneMode, PruneSegment, TxNumber, B256}; use reth_provider::PruneCheckpointReader; - use reth_stages::test_utils::TestStageDB; + use reth_stages::test_utils::{StorageKind, TestStageDB}; use std::ops::Sub; #[test] @@ -113,7 +113,7 @@ mod tests { let mut rng = generators::rng(); let blocks = random_block_range(&mut rng, 1..=10, B256::ZERO, 2..3); - db.insert_blocks(blocks.iter(), None).expect("insert blocks"); + db.insert_blocks(blocks.iter(), StorageKind::Database(None)).expect("insert blocks"); let mut tx_hash_numbers = Vec::new(); for block in &blocks { @@ -129,7 +129,7 @@ mod tests { ); assert_eq!( db.table::().unwrap().len(), - db.table::().unwrap().len() + db.table::().unwrap().len() ); let test_prune = |to_block: BlockNumber, expected_result: (bool, usize)| { @@ -197,7 +197,7 @@ mod tests { last_pruned_block_number.checked_sub(if result.done { 0 } else { 1 }); assert_eq!( - db.table::().unwrap().len(), + db.table::().unwrap().len(), tx_hash_numbers.len() - (last_pruned_tx_number + 1) ); assert_eq!( diff --git a/crates/prune/src/segments/transactions.rs b/crates/prune/src/segments/transactions.rs index 7155cd8888ad..3c2ac425536b 100644 --- a/crates/prune/src/segments/transactions.rs +++ b/crates/prune/src/segments/transactions.rs @@ -80,7 +80,7 @@ mod tests { use reth_interfaces::test_utils::{generators, generators::random_block_range}; use reth_primitives::{BlockNumber, PruneCheckpoint, PruneMode, PruneSegment, TxNumber, B256}; use reth_provider::PruneCheckpointReader; - use reth_stages::test_utils::TestStageDB; + use reth_stages::test_utils::{StorageKind, TestStageDB}; use std::ops::Sub; #[test] @@ -89,7 +89,7 @@ mod tests { let mut rng = generators::rng(); let blocks = random_block_range(&mut rng, 1..=100, B256::ZERO, 2..3); - db.insert_blocks(blocks.iter(), None).expect("insert blocks"); + db.insert_blocks(blocks.iter(), StorageKind::Database(None)).expect("insert blocks"); let transactions = blocks.iter().flat_map(|block| &block.body).collect::>(); diff --git a/crates/rpc/rpc-types-compat/src/transaction/mod.rs b/crates/rpc/rpc-types-compat/src/transaction/mod.rs index 5ecad24dca8c..c29359ec7e28 100644 --- a/crates/rpc/rpc-types-compat/src/transaction/mod.rs +++ b/crates/rpc/rpc-types-compat/src/transaction/mod.rs @@ -124,7 +124,7 @@ fn fill( nonce: U64::from(signed_tx.nonce()), from: signer, to, - value: signed_tx.value().into(), + value: signed_tx.value(), gas_price, max_fee_per_gas, max_priority_fee_per_gas: signed_tx.max_priority_fee_per_gas().map(U128::from), @@ -200,7 +200,7 @@ pub fn transaction_to_call_request(tx: TransactionSignedEcRecovered) -> Transact max_fee_per_gas: max_fee_per_gas.map(U256::from), max_priority_fee_per_gas: max_priority_fee_per_gas.map(U256::from), gas: Some(U256::from(gas)), - value: Some(value.into()), + value: Some(value), input: TransactionInput::new(input), nonce: Some(U64::from(nonce)), chain_id: chain_id.map(U64::from), diff --git a/crates/rpc/rpc-types-compat/src/transaction/typed.rs b/crates/rpc/rpc-types-compat/src/transaction/typed.rs index 55d316040d4e..2b9b32471465 100644 --- a/crates/rpc/rpc-types-compat/src/transaction/typed.rs +++ b/crates/rpc/rpc-types-compat/src/transaction/typed.rs @@ -17,7 +17,7 @@ pub fn to_primitive_transaction( gas_price: tx.gas_price.to(), gas_limit: tx.gas_limit.try_into().ok()?, to: to_primitive_transaction_kind(tx.kind), - value: tx.value.into(), + value: tx.value, input: tx.input, }), TypedTransactionRequest::EIP2930(tx) => Transaction::Eip2930(TxEip2930 { @@ -26,7 +26,7 @@ pub fn to_primitive_transaction( gas_price: tx.gas_price.to(), gas_limit: tx.gas_limit.try_into().ok()?, to: to_primitive_transaction_kind(tx.kind), - value: tx.value.into(), + value: tx.value, input: tx.input, access_list: tx.access_list.into(), }), @@ -36,7 +36,7 @@ pub fn to_primitive_transaction( max_fee_per_gas: tx.max_fee_per_gas.to(), gas_limit: tx.gas_limit.try_into().ok()?, to: to_primitive_transaction_kind(tx.kind), - value: tx.value.into(), + value: tx.value, input: tx.input, access_list: tx.access_list.into(), max_priority_fee_per_gas: tx.max_priority_fee_per_gas.to(), @@ -48,7 +48,7 @@ pub fn to_primitive_transaction( max_fee_per_gas: tx.max_fee_per_gas.to(), max_priority_fee_per_gas: tx.max_priority_fee_per_gas.to(), to: to_primitive_transaction_kind(tx.kind), - value: tx.value.into(), + value: tx.value, access_list: tx.access_list.into(), blob_versioned_hashes: tx.blob_versioned_hashes, max_fee_per_blob_gas: tx.max_fee_per_blob_gas.to(), diff --git a/crates/rpc/rpc/Cargo.toml b/crates/rpc/rpc/Cargo.toml index 517c7ca15742..566fa90eec36 100644 --- a/crates/rpc/rpc/Cargo.toml +++ b/crates/rpc/rpc/Cargo.toml @@ -81,7 +81,7 @@ tracing.workspace = true tracing-futures = "0.2" schnellru.workspace = true futures.workspace = true -derive_more = "0.99" +derive_more.workspace = true lazy_static = "*" [dev-dependencies] diff --git a/crates/rpc/rpc/src/debug.rs b/crates/rpc/rpc/src/debug.rs index ada59386d6ac..b5105dcd3fc7 100644 --- a/crates/rpc/rpc/src/debug.rs +++ b/crates/rpc/rpc/src/debug.rs @@ -10,13 +10,12 @@ use crate::{ result::{internal_rpc_err, ToRpcResult}, BlockingTaskGuard, EthApiSpec, }; -use alloy_primitives::U256; use alloy_rlp::{Decodable, Encodable}; use async_trait::async_trait; use jsonrpsee::core::RpcResult; use reth_primitives::{ revm::env::tx_env_with_recovered, Address, Block, BlockId, BlockNumberOrTag, Bytes, - TransactionSignedEcRecovered, Withdrawals, B256, + TransactionSignedEcRecovered, Withdrawals, B256, U256, }; use reth_provider::{ BlockReaderIdExt, ChainSpecProvider, HeaderProvider, StateProviderBox, TransactionVariant, diff --git a/crates/rpc/rpc/src/eth/api/transactions.rs b/crates/rpc/rpc/src/eth/api/transactions.rs index c96d2ee5d4b6..397407b924ac 100644 --- a/crates/rpc/rpc/src/eth/api/transactions.rs +++ b/crates/rpc/rpc/src/eth/api/transactions.rs @@ -442,7 +442,7 @@ where if let Some(tx) = self.pool().get_pooled_transaction_element(hash).map(|tx| tx.envelope_encoded()) { - return Ok(Some(tx)); + return Ok(Some(tx)) } self.on_blocking_task(|this| async move { diff --git a/crates/rpc/rpc/src/txpool.rs b/crates/rpc/rpc/src/txpool.rs index c61793f2fdf4..f9def07b1b97 100644 --- a/crates/rpc/rpc/src/txpool.rs +++ b/crates/rpc/rpc/src/txpool.rs @@ -96,7 +96,7 @@ where key, TxpoolInspectSummary { to: tx.to(), - value: tx.value().into(), + value: tx.value(), gas: U256::from(tx.gas_limit()), gas_price: U256::from(tx.transaction.max_fee_per_gas()), }, diff --git a/crates/snapshot/README.md b/crates/snapshot/README.md deleted file mode 100644 index 6056bbf9f0a0..000000000000 --- a/crates/snapshot/README.md +++ /dev/null @@ -1,88 +0,0 @@ -# Snapshot - -## Overview - -Data that has reached a finalized state and won't undergo further changes (essentially frozen) should be read without concerns of modification. This makes it unsuitable for traditional databases. - -This crate aims to copy this data from the current database to multiple static files, aggregated by block ranges. At every 500_000th block new static files are created. - -Below are two diagrams illustrating the processes of creating static files (custom format: `NippyJar`) and querying them. A glossary is also provided to explain the different (linked) components involved in these processes. - -
- Creation diagram (Snapshotter) - -```mermaid -graph TD; - I("BLOCK_HEIGHT % 500_000 == 0")--triggers-->SP(Snapshotter) - SP --> |triggers| SH["create_snapshot(block_range, SnapshotSegment::Headers)"] - SP --> |triggers| ST["create_snapshot(block_range, SnapshotSegment::Transactions)"] - SP --> |triggers| SR["create_snapshot(block_range, SnapshotSegment::Receipts)"] - SP --> |triggers| ETC["create_snapshot(block_range, ...)"] - SH --> CS["create_snapshot::< T >(DatabaseCursor)"] - ST --> CS - SR --> CS - ETC --> CS - CS --> |create| IF(NippyJar::InclusionFilters) - CS -- iterates --> DC(DatabaseCursor) -->HN{HasNext} - HN --> |true| NJC(NippyJar::Compression) - NJC --> HN - NJC --store--> NJ - HN --> |false| NJ - IF --store--> NJ(NippyJar) - NJ --freeze--> F(File) - F--"on success"--> SP1(Snapshotter) - SP1 --"sends BLOCK_HEIGHT"--> HST(HighestSnapshotTracker) - HST --"read by"-->Pruner - HST --"read by"-->DatabaseProvider - HST --"read by"-->SnapsotProvider - HST --"read by"-->ProviderFactory - -``` -
- - -
- Query diagram (Provider) - -```mermaid -graph TD; - RPC-->P - P("Provider::header(block_number)")-->PF(ProviderFactory) - PF--shares-->SP1("Arc(SnapshotProvider)") - SP1--shares-->PD(DatabaseProvider) - PF--creates-->PD - PD--check `HighestSnapshotTracker`-->PD - PD-->DC1{block_number
>
highest snapshot block} - DC1 --> |true| PD1("DatabaseProvider::header(block_number)") - DC1 --> |false| ASP("SnapshotProvider::header(block_number)") - PD1 --> MDBX - ASP --find correct jar and creates--> JP("SnapshotJarProvider::header(block_number)") - JP --"creates"-->SC(SnapshotCursor) - SC --".get_one< HeaderMask< Header > >(number)"--->NJC("NippyJarCursor") - NJC--".row_by_number(row_index, mask)"-->NJ[NippyJar] - NJ--"&[u8]"-->NJC - NJC--"&[u8]"-->SC - SC--"Header"--> JP - JP--"Header"--> ASP -``` -
- - -### Glossary -In descending order of abstraction hierarchy: - -[`Snapshotter`](../../crates/snapshot/src/snapshotter.rs#L20): A `reth` background service that **copies** data from the database to new snapshot files when the block height reaches a certain threshold (e.g., `500_000th`). Upon completion, it dispatches a notification about the higher snapshotted block to `HighestSnapshotTracker` channel. **It DOES NOT remove data from the database.** - -[`HighestSnapshotTracker`](../../crates/snapshot/src/snapshotter.rs#L22): A channel utilized by `Snapshotter` to announce the newest snapshot block to all components with a listener: `Pruner` (to know which additional tables can be pruned) and `DatabaseProvider` (to know which data can be queried from the snapshots). - -[`SnapshotProvider`](../../crates/storage/provider/src/providers/snapshot/manager.rs#L15) A provider similar to `DatabaseProvider`, **managing all existing snapshot files** and selecting the optimal one (by range and segment type) to fulfill a request. **A single instance is shared across all components and should be instantiated only once within `ProviderFactory`**. An immutable reference is given everytime `ProviderFactory` creates a new `DatabaseProvider`. - -[`SnapshotJarProvider`](../../crates/storage/provider/src/providers/snapshot/jar.rs#L42) A provider similar to `DatabaseProvider` that provides access to a **single snapshot file**. - -[`SnapshotCursor`](../../crates/storage/db/src/snapshot/cursor.rs#L12) An elevated abstraction of `NippyJarCursor` for simplified access. It associates the bitmasks with type decoding. For instance, `cursor.get_two::>(tx_number)` would yield `Tx` and `Signature`, eliminating the need to manage masks or invoke a decoder/decompressor. - -[`SnapshotSegment`](../../crates/primitives/src/snapshot/segment.rs#L10) Each snapshot file only contains data of a specific segment, e.g., `Headers`, `Transactions`, or `Receipts`. - -[`NippyJarCursor`](../../crates/storage/nippy-jar/src/cursor.rs#L12) Accessor of data in a `NippyJar` file. It enables queries either by row number (e.g., block number 1) or by a predefined key not part of the file (e.g., transaction hashes). If a file has multiple columns (e.g., `Tx | TxSender | Signature`), and one wishes to access only one of the column values, this can be accomplished by bitmasks. (e.g., for `TxSender`, the mask would be `0b010`). - -[`NippyJar`](../../crates/storage/nippy-jar/src/lib.rs#57) A create-only file format. No data can be appended after creation. It supports multiple columns, compression (e.g., Zstd (with and without dictionaries), lz4, uncompressed) and inclusion filters (e.g., cuckoo filter: `is hash X part of this dataset`). Snapshots are organized by block ranges. (e.g., `TransactionSnapshot_499_999.jar` contains a transaction per row for all transactions from block `0` to block `499_999`). For more check the struct documentation. diff --git a/crates/snapshot/src/error.rs b/crates/snapshot/src/error.rs deleted file mode 100644 index 302803835cf5..000000000000 --- a/crates/snapshot/src/error.rs +++ /dev/null @@ -1,25 +0,0 @@ -use reth_db::DatabaseError; -use reth_interfaces::RethError; -use reth_provider::ProviderError; -use thiserror::Error; - -/// Error returned by [crate::Snapshotter::run] -#[derive(Error, Debug)] -/// Errors that can occur during snapshotting. -pub enum SnapshotterError { - /// Inconsistent data error. - #[error("inconsistent data: {0}")] - InconsistentData(&'static str), - - /// Error related to the interface. - #[error(transparent)] - Interface(#[from] RethError), - - /// Error related to the database. - #[error(transparent)] - Database(#[from] DatabaseError), - - /// Error related to the provider. - #[error(transparent)] - Provider(#[from] ProviderError), -} diff --git a/crates/snapshot/src/segments/headers.rs b/crates/snapshot/src/segments/headers.rs deleted file mode 100644 index 0a524e86c3b9..000000000000 --- a/crates/snapshot/src/segments/headers.rs +++ /dev/null @@ -1,98 +0,0 @@ -use crate::segments::{prepare_jar, Segment, SegmentHeader}; -use reth_db::{ - cursor::DbCursorRO, database::Database, snapshot::create_snapshot_T1_T2_T3, tables, - transaction::DbTx, RawKey, RawTable, -}; -use reth_interfaces::provider::ProviderResult; -use reth_primitives::{ - snapshot::{Compression, Filters, SegmentConfig}, - BlockNumber, SnapshotSegment, -}; -use reth_provider::DatabaseProviderRO; -use std::{ops::RangeInclusive, path::Path}; - -/// Snapshot segment responsible for [SnapshotSegment::Headers] part of data. -#[derive(Debug)] -pub struct Headers { - config: SegmentConfig, -} - -impl Headers { - /// Creates new instance of [Headers] snapshot segment. - pub fn new(compression: Compression, filters: Filters) -> Self { - Self { config: SegmentConfig { compression, filters } } - } -} - -impl Default for Headers { - fn default() -> Self { - Self { config: SnapshotSegment::Headers.config() } - } -} - -impl Segment for Headers { - fn segment(&self) -> SnapshotSegment { - SnapshotSegment::Headers - } - - fn snapshot( - &self, - provider: &DatabaseProviderRO, - directory: impl AsRef, - range: RangeInclusive, - ) -> ProviderResult<()> { - let range_len = range.clone().count(); - let mut jar = prepare_jar::( - provider, - directory, - self.segment(), - self.config, - range.clone(), - range_len, - || { - Ok([ - self.dataset_for_compression::( - provider, &range, range_len, - )?, - self.dataset_for_compression::( - provider, &range, range_len, - )?, - self.dataset_for_compression::( - provider, &range, range_len, - )?, - ]) - }, - )?; - - // Generate list of hashes for filters & PHF - let mut cursor = provider.tx_ref().cursor_read::>()?; - let mut hashes = None; - if self.config.filters.has_filters() { - hashes = Some( - cursor - .walk(Some(RawKey::from(*range.start())))? - .take(range_len) - .map(|row| row.map(|(_key, value)| value.into_value()).map_err(|e| e.into())), - ); - } - - create_snapshot_T1_T2_T3::< - tables::Headers, - tables::HeaderTD, - tables::CanonicalHeaders, - BlockNumber, - SegmentHeader, - >( - provider.tx_ref(), - range, - None, - // We already prepared the dictionary beforehand - None::>>>, - hashes, - range_len, - &mut jar, - )?; - - Ok(()) - } -} diff --git a/crates/snapshot/src/segments/mod.rs b/crates/snapshot/src/segments/mod.rs deleted file mode 100644 index 68b1b81b0999..000000000000 --- a/crates/snapshot/src/segments/mod.rs +++ /dev/null @@ -1,100 +0,0 @@ -//! Snapshot segment implementations and utilities. - -mod transactions; -pub use transactions::Transactions; - -mod headers; -pub use headers::Headers; - -mod receipts; -pub use receipts::Receipts; - -use reth_db::{ - cursor::DbCursorRO, database::Database, table::Table, transaction::DbTx, RawKey, RawTable, -}; -use reth_interfaces::provider::ProviderResult; -use reth_nippy_jar::NippyJar; -use reth_primitives::{ - snapshot::{ - Compression, Filters, InclusionFilter, PerfectHashingFunction, SegmentConfig, SegmentHeader, - }, - BlockNumber, SnapshotSegment, -}; -use reth_provider::{DatabaseProviderRO, TransactionsProviderExt}; -use std::{ops::RangeInclusive, path::Path}; - -pub(crate) type Rows = [Vec>; COLUMNS]; - -/// A segment represents a snapshotting of some portion of the data. -pub trait Segment: Default { - /// Snapshot data using the provided range. The `directory` parameter determines the snapshot - /// file's save location. - fn snapshot( - &self, - provider: &DatabaseProviderRO, - directory: impl AsRef, - range: RangeInclusive, - ) -> ProviderResult<()>; - - /// Returns this struct's [`SnapshotSegment`]. - fn segment(&self) -> SnapshotSegment; - - /// Generates the dataset to train a zstd dictionary with the most recent rows (at most 1000). - fn dataset_for_compression>( - &self, - provider: &DatabaseProviderRO, - range: &RangeInclusive, - range_len: usize, - ) -> ProviderResult>> { - let mut cursor = provider.tx_ref().cursor_read::>()?; - Ok(cursor - .walk_back(Some(RawKey::from(*range.end())))? - .take(range_len.min(1000)) - .map(|row| row.map(|(_key, value)| value.into_value()).expect("should exist")) - .collect::>()) - } -} - -/// Returns a [`NippyJar`] according to the desired configuration. The `directory` parameter -/// determines the snapshot file's save location. -pub(crate) fn prepare_jar( - provider: &DatabaseProviderRO, - directory: impl AsRef, - segment: SnapshotSegment, - segment_config: SegmentConfig, - block_range: RangeInclusive, - total_rows: usize, - prepare_compression: impl Fn() -> ProviderResult>, -) -> ProviderResult> { - let tx_range = provider.transaction_range_by_block_range(block_range.clone())?; - let mut nippy_jar = NippyJar::new( - COLUMNS, - &directory.as_ref().join(segment.filename(&block_range, &tx_range).as_str()), - SegmentHeader::new(block_range, tx_range, segment), - ); - - nippy_jar = match segment_config.compression { - Compression::Lz4 => nippy_jar.with_lz4(), - Compression::Zstd => nippy_jar.with_zstd(false, 0), - Compression::ZstdWithDictionary => { - let dataset = prepare_compression()?; - - nippy_jar = nippy_jar.with_zstd(true, 5_000_000); - nippy_jar.prepare_compression(dataset.to_vec())?; - nippy_jar - } - Compression::Uncompressed => nippy_jar, - }; - - if let Filters::WithFilters(inclusion_filter, phf) = segment_config.filters { - nippy_jar = match inclusion_filter { - InclusionFilter::Cuckoo => nippy_jar.with_cuckoo_filter(total_rows), - }; - nippy_jar = match phf { - PerfectHashingFunction::Fmph => nippy_jar.with_fmph(), - PerfectHashingFunction::GoFmph => nippy_jar.with_gofmph(), - }; - } - - Ok(nippy_jar) -} diff --git a/crates/snapshot/src/segments/receipts.rs b/crates/snapshot/src/segments/receipts.rs deleted file mode 100644 index 5c5a48112976..000000000000 --- a/crates/snapshot/src/segments/receipts.rs +++ /dev/null @@ -1,84 +0,0 @@ -use crate::segments::{prepare_jar, Segment}; -use reth_db::{database::Database, snapshot::create_snapshot_T1, tables}; -use reth_interfaces::provider::ProviderResult; -use reth_primitives::{ - snapshot::{Compression, Filters, SegmentConfig, SegmentHeader}, - BlockNumber, SnapshotSegment, TxNumber, -}; -use reth_provider::{DatabaseProviderRO, TransactionsProviderExt}; -use std::{ops::RangeInclusive, path::Path}; - -/// Snapshot segment responsible for [SnapshotSegment::Receipts] part of data. -#[derive(Debug)] -pub struct Receipts { - config: SegmentConfig, -} - -impl Receipts { - /// Creates new instance of [Receipts] snapshot segment. - pub fn new(compression: Compression, filters: Filters) -> Self { - Self { config: SegmentConfig { compression, filters } } - } -} - -impl Default for Receipts { - fn default() -> Self { - Self { config: SnapshotSegment::Receipts.config() } - } -} - -impl Segment for Receipts { - fn segment(&self) -> SnapshotSegment { - SnapshotSegment::Receipts - } - - fn snapshot( - &self, - provider: &DatabaseProviderRO, - directory: impl AsRef, - block_range: RangeInclusive, - ) -> ProviderResult<()> { - let tx_range = provider.transaction_range_by_block_range(block_range.clone())?; - let tx_range_len = tx_range.clone().count(); - - let mut jar = prepare_jar::( - provider, - directory, - self.segment(), - self.config, - block_range, - tx_range_len, - || { - Ok([self.dataset_for_compression::( - provider, - &tx_range, - tx_range_len, - )?]) - }, - )?; - - // Generate list of hashes for filters & PHF - let mut hashes = None; - if self.config.filters.has_filters() { - hashes = Some( - provider - .transaction_hashes_by_range(*tx_range.start()..(*tx_range.end() + 1))? - .into_iter() - .map(|(tx, _)| Ok(tx)), - ); - } - - create_snapshot_T1::( - provider.tx_ref(), - tx_range, - None, - // We already prepared the dictionary beforehand - None::>>>, - hashes, - tx_range_len, - &mut jar, - )?; - - Ok(()) - } -} diff --git a/crates/snapshot/src/segments/transactions.rs b/crates/snapshot/src/segments/transactions.rs deleted file mode 100644 index ea936bd95bff..000000000000 --- a/crates/snapshot/src/segments/transactions.rs +++ /dev/null @@ -1,84 +0,0 @@ -use crate::segments::{prepare_jar, Segment}; -use reth_db::{database::Database, snapshot::create_snapshot_T1, tables}; -use reth_interfaces::provider::ProviderResult; -use reth_primitives::{ - snapshot::{Compression, Filters, SegmentConfig, SegmentHeader}, - BlockNumber, SnapshotSegment, TxNumber, -}; -use reth_provider::{DatabaseProviderRO, TransactionsProviderExt}; -use std::{ops::RangeInclusive, path::Path}; - -/// Snapshot segment responsible for [SnapshotSegment::Transactions] part of data. -#[derive(Debug)] -pub struct Transactions { - config: SegmentConfig, -} - -impl Transactions { - /// Creates new instance of [Transactions] snapshot segment. - pub fn new(compression: Compression, filters: Filters) -> Self { - Self { config: SegmentConfig { compression, filters } } - } -} - -impl Default for Transactions { - fn default() -> Self { - Self { config: SnapshotSegment::Transactions.config() } - } -} - -impl Segment for Transactions { - fn segment(&self) -> SnapshotSegment { - SnapshotSegment::Transactions - } - - fn snapshot( - &self, - provider: &DatabaseProviderRO, - directory: impl AsRef, - block_range: RangeInclusive, - ) -> ProviderResult<()> { - let tx_range = provider.transaction_range_by_block_range(block_range.clone())?; - let tx_range_len = tx_range.clone().count(); - - let mut jar = prepare_jar::( - provider, - directory, - self.segment(), - self.config, - block_range, - tx_range_len, - || { - Ok([self.dataset_for_compression::( - provider, - &tx_range, - tx_range_len, - )?]) - }, - )?; - - // Generate list of hashes for filters & PHF - let mut hashes = None; - if self.config.filters.has_filters() { - hashes = Some( - provider - .transaction_hashes_by_range(*tx_range.start()..(*tx_range.end() + 1))? - .into_iter() - .map(|(tx, _)| Ok(tx)), - ); - } - - create_snapshot_T1::( - provider.tx_ref(), - tx_range, - None, - // We already prepared the dictionary beforehand - None::>>>, - hashes, - tx_range_len, - &mut jar, - )?; - - Ok(()) - } -} diff --git a/crates/snapshot/src/snapshotter.rs b/crates/snapshot/src/snapshotter.rs deleted file mode 100644 index 993c4c140996..000000000000 --- a/crates/snapshot/src/snapshotter.rs +++ /dev/null @@ -1,397 +0,0 @@ -//! Support for snapshotting. - -use crate::{segments, segments::Segment, SnapshotterError}; -use reth_db::{database::Database, snapshot::iter_snapshots}; -use reth_interfaces::{RethError, RethResult}; -use reth_primitives::{snapshot::HighestSnapshots, BlockNumber, TxNumber}; -use reth_provider::{BlockReader, DatabaseProviderRO, ProviderFactory, TransactionsProviderExt}; -use std::{ - collections::HashMap, - ops::RangeInclusive, - path::{Path, PathBuf}, -}; -use tokio::sync::watch; -use tracing::warn; - -/// Result of [Snapshotter::run] execution. -pub type SnapshotterResult = Result; - -/// The snapshotter type itself with the result of [Snapshotter::run] -pub type SnapshotterWithResult = (Snapshotter, SnapshotterResult); - -/// Snapshots are initially created in `{...}/datadir/snapshots/temp` and moved once finished. This -/// directory is cleaned up on every booting up of the node. -const TEMPORARY_SUBDIRECTORY: &str = "temp"; - -/// Snapshotting routine. Main snapshotting logic happens in [Snapshotter::run]. -#[derive(Debug)] -pub struct Snapshotter { - /// Provider factory - provider_factory: ProviderFactory, - /// Directory where snapshots are located - snapshots_path: PathBuf, - /// Highest snapshotted block numbers for each segment - highest_snapshots: HighestSnapshots, - /// Channel sender to notify other components of the new highest snapshots - highest_snapshots_notifier: watch::Sender>, - /// Channel receiver to be cloned and shared that already comes with the newest value - highest_snapshots_tracker: HighestSnapshotsTracker, - /// Block interval after which the snapshot is taken. - block_interval: u64, -} - -/// Tracker for the latest [`HighestSnapshots`] value. -pub type HighestSnapshotsTracker = watch::Receiver>; - -/// Snapshot targets, per data part, measured in [`BlockNumber`] and [`TxNumber`], if applicable. -#[derive(Debug, Clone, Eq, PartialEq)] -pub struct SnapshotTargets { - headers: Option>, - receipts: Option<(RangeInclusive, RangeInclusive)>, - transactions: Option<(RangeInclusive, RangeInclusive)>, -} - -impl SnapshotTargets { - /// Returns `true` if any of the targets are [Some]. - pub fn any(&self) -> bool { - self.headers.is_some() || self.receipts.is_some() || self.transactions.is_some() - } - - /// Returns `true` if all targets are either [None] or multiple of `block_interval`. - fn is_multiple_of_block_interval(&self, block_interval: u64) -> bool { - [ - self.headers.as_ref(), - self.receipts.as_ref().map(|(blocks, _)| blocks), - self.transactions.as_ref().map(|(blocks, _)| blocks), - ] - .iter() - .all(|blocks| blocks.map_or(true, |blocks| (blocks.end() + 1) % block_interval == 0)) - } - - // Returns `true` if all targets are either [`None`] or has beginning of the range equal to the - // highest snapshot. - fn is_contiguous_to_highest_snapshots(&self, snapshots: HighestSnapshots) -> bool { - [ - (self.headers.as_ref(), snapshots.headers), - (self.receipts.as_ref().map(|(blocks, _)| blocks), snapshots.receipts), - (self.transactions.as_ref().map(|(blocks, _)| blocks), snapshots.transactions), - ] - .iter() - .all(|(target, highest)| { - target.map_or(true, |block_number| { - highest.map_or(*block_number.start() == 0, |previous_block_number| { - *block_number.start() == previous_block_number + 1 - }) - }) - }) - } -} - -impl Snapshotter { - /// Creates a new [Snapshotter]. - pub fn new( - provider_factory: ProviderFactory, - snapshots_path: impl AsRef, - block_interval: u64, - ) -> RethResult { - let (highest_snapshots_notifier, highest_snapshots_tracker) = watch::channel(None); - - let mut snapshotter = Self { - provider_factory, - snapshots_path: snapshots_path.as_ref().into(), - highest_snapshots: HighestSnapshots::default(), - highest_snapshots_notifier, - highest_snapshots_tracker, - block_interval, - }; - - snapshotter.create_directory()?; - snapshotter.update_highest_snapshots_tracker()?; - - Ok(snapshotter) - } - - /// Ensures the snapshots directory and its temporary subdirectory are properly set up. - /// - /// This function performs the following actions: - /// 1. If `datadir/snapshots` does not exist, it creates it. - /// 2. Ensures `datadir/snapshots/temp` exists and is empty. - /// - /// The `temp` subdirectory is where snapshots are initially created before being - /// moved to their final location within `datadir/snapshots`. - fn create_directory(&self) -> RethResult<()> { - let temporary_path = self.snapshots_path.join(TEMPORARY_SUBDIRECTORY); - - if !self.snapshots_path.exists() { - reth_primitives::fs::create_dir_all(&self.snapshots_path)?; - } else if temporary_path.exists() { - reth_primitives::fs::remove_dir_all(&temporary_path)?; - } - - reth_primitives::fs::create_dir_all(temporary_path)?; - - Ok(()) - } - - #[cfg(test)] - fn set_highest_snapshots_from_targets(&mut self, targets: &SnapshotTargets) { - if let Some(block_number) = &targets.headers { - self.highest_snapshots.headers = Some(*block_number.end()); - } - if let Some((block_number, _)) = &targets.receipts { - self.highest_snapshots.receipts = Some(*block_number.end()); - } - if let Some((block_number, _)) = &targets.transactions { - self.highest_snapshots.transactions = Some(*block_number.end()); - } - } - - /// Looks into the snapshot directory to find the highest snapshotted block of each segment, and - /// notifies every tracker. - fn update_highest_snapshots_tracker(&mut self) -> RethResult<()> { - // It walks over the directory and parses the snapshot filenames extracting - // `SnapshotSegment` and their inclusive range. It then takes the maximum block - // number for each specific segment. - for (segment, ranges) in - iter_snapshots(&self.snapshots_path).map_err(|err| RethError::Provider(err.into()))? - { - for (block_range, _) in ranges { - let max_segment_block = self.highest_snapshots.as_mut(segment); - if max_segment_block.map_or(true, |block| block < *block_range.end()) { - *max_segment_block = Some(*block_range.end()); - } - } - } - - let _ = self.highest_snapshots_notifier.send(Some(self.highest_snapshots)).map_err(|_| { - warn!(target: "snapshot", "Highest snapshots channel closed"); - }); - - Ok(()) - } - - /// Returns a new [`HighestSnapshotsTracker`]. - pub fn highest_snapshot_receiver(&self) -> HighestSnapshotsTracker { - self.highest_snapshots_tracker.clone() - } - - /// Run the snapshotter - pub fn run(&mut self, targets: SnapshotTargets) -> SnapshotterResult { - debug_assert!(targets.is_multiple_of_block_interval(self.block_interval)); - debug_assert!(targets.is_contiguous_to_highest_snapshots(self.highest_snapshots)); - - self.run_segment::(targets.receipts.clone().map(|(range, _)| range))?; - - self.run_segment::( - targets.transactions.clone().map(|(range, _)| range), - )?; - - self.run_segment::(targets.headers.clone())?; - - self.update_highest_snapshots_tracker()?; - - Ok(targets) - } - - /// Run the snapshotter for one segment. - /// - /// It first builds the snapshot in a **temporary directory** inside the snapshots directory. If - /// for some reason the node is terminated during the snapshot process, it will be cleaned - /// up on boot (on [`Snapshotter::new`]) and the snapshot process restarted from scratch for - /// this block range and segment. - /// - /// If it succeeds, then we move the snapshot file from the temporary directory to its main one. - fn run_segment( - &self, - block_range: Option>, - ) -> RethResult<()> { - if let Some(block_range) = block_range { - let temp = self.snapshots_path.join(TEMPORARY_SUBDIRECTORY); - let provider = self.provider_factory.provider()?; - let tx_range = provider.transaction_range_by_block_range(block_range.clone())?; - let segment = S::default(); - let filename = segment.segment().filename(&block_range, &tx_range); - - segment.snapshot::(&provider, temp.clone(), block_range)?; - - reth_primitives::fs::rename(temp.join(&filename), self.snapshots_path.join(filename))?; - } - Ok(()) - } - - /// Returns a snapshot targets at the provided finalized block number, respecting the block - /// interval. The target is determined by the check against last snapshots. - pub fn get_snapshot_targets( - &self, - finalized_block_number: BlockNumber, - ) -> RethResult { - let provider = self.provider_factory.provider()?; - - // Round down `finalized_block_number` to a multiple of `block_interval` - let to_block_number = finalized_block_number.saturating_sub( - // Adjust for 0-indexed block numbers - (finalized_block_number + 1) % self.block_interval, - ); - - // Calculate block ranges to snapshot - let headers_block_range = - self.get_snapshot_target_block_range(to_block_number, self.highest_snapshots.headers); - let receipts_block_range = - self.get_snapshot_target_block_range(to_block_number, self.highest_snapshots.receipts); - let transactions_block_range = self - .get_snapshot_target_block_range(to_block_number, self.highest_snapshots.transactions); - - // Calculate transaction ranges to snapshot - let mut block_to_tx_number_cache = HashMap::default(); - let receipts_tx_range = self.get_snapshot_target_tx_range( - &provider, - &mut block_to_tx_number_cache, - self.highest_snapshots.receipts, - &receipts_block_range, - )?; - let transactions_tx_range = self.get_snapshot_target_tx_range( - &provider, - &mut block_to_tx_number_cache, - self.highest_snapshots.transactions, - &transactions_block_range, - )?; - - Ok(SnapshotTargets { - headers: headers_block_range - .size_hint() - .1 - .expect("finalized block should be >= last headers snapshot") - .ge(&(self.block_interval as usize)) - .then_some(headers_block_range), - receipts: receipts_block_range - .size_hint() - .1 - .expect("finalized block should be >= last receipts snapshot") - .ge(&(self.block_interval as usize)) - .then_some((receipts_block_range, receipts_tx_range)), - transactions: transactions_block_range - .size_hint() - .1 - .expect("finalized block should be >= last transactions snapshot") - .ge(&(self.block_interval as usize)) - .then_some((transactions_block_range, transactions_tx_range)), - }) - } - - fn get_snapshot_target_block_range( - &self, - to_block_number: BlockNumber, - highest_snapshot: Option, - ) -> RangeInclusive { - let highest_snapshot = highest_snapshot.map_or(0, |block_number| block_number + 1); - highest_snapshot..=(highest_snapshot + self.block_interval - 1).min(to_block_number) - } - - fn get_snapshot_target_tx_range( - &self, - provider: &DatabaseProviderRO, - block_to_tx_number_cache: &mut HashMap, - highest_snapshot: Option, - block_range: &RangeInclusive, - ) -> RethResult> { - let from_tx_number = if let Some(block_number) = highest_snapshot { - *block_to_tx_number_cache.entry(block_number).or_insert( - provider - .block_body_indices(block_number)? - .ok_or(RethError::Custom( - "Block body indices for highest snapshot not found".to_string(), - ))? - .next_tx_num(), - ) - } else { - 0 - }; - - let to_tx_number = *block_to_tx_number_cache.entry(*block_range.end()).or_insert( - provider - .block_body_indices(*block_range.end())? - .ok_or(RethError::Custom( - "Block body indices for block range end not found".to_string(), - ))? - .last_tx_num(), - ); - Ok(from_tx_number..=to_tx_number) - } -} - -#[cfg(test)] -mod tests { - use crate::{snapshotter::SnapshotTargets, Snapshotter}; - use assert_matches::assert_matches; - use reth_interfaces::{ - test_utils::{generators, generators::random_block_range}, - RethError, - }; - use reth_primitives::{snapshot::HighestSnapshots, B256}; - use reth_stages::test_utils::TestStageDB; - - #[test] - fn new() { - let db = TestStageDB::default(); - let snapshots_dir = tempfile::TempDir::new().unwrap(); - let snapshotter = Snapshotter::new(db.factory, snapshots_dir.into_path(), 2).unwrap(); - - assert_eq!( - *snapshotter.highest_snapshot_receiver().borrow(), - Some(HighestSnapshots::default()) - ); - } - - #[test] - fn get_snapshot_targets() { - let db = TestStageDB::default(); - let snapshots_dir = tempfile::TempDir::new().unwrap(); - let mut rng = generators::rng(); - - let blocks = random_block_range(&mut rng, 0..=3, B256::ZERO, 2..3); - db.insert_blocks(blocks.iter(), None).expect("insert blocks"); - - let mut snapshotter = Snapshotter::new(db.factory, snapshots_dir.into_path(), 2).unwrap(); - - // Snapshot targets has data per part up to the passed finalized block number, - // respecting the block interval - let targets = snapshotter.get_snapshot_targets(1).expect("get snapshot targets"); - assert_eq!( - targets, - SnapshotTargets { - headers: Some(0..=1), - receipts: Some((0..=1, 0..=3)), - transactions: Some((0..=1, 0..=3)) - } - ); - assert!(targets.is_multiple_of_block_interval(snapshotter.block_interval)); - assert!(targets.is_contiguous_to_highest_snapshots(snapshotter.highest_snapshots)); - // Imitate snapshotter run according to the targets which updates the last snapshots state - snapshotter.set_highest_snapshots_from_targets(&targets); - - // Nothing to snapshot, last snapshots state of snapshotter doesn't pass the thresholds - assert_eq!( - snapshotter.get_snapshot_targets(2), - Ok(SnapshotTargets { headers: None, receipts: None, transactions: None }) - ); - - // Snapshot targets has data per part up to the passed finalized block number, - // respecting the block interval - let targets = snapshotter.get_snapshot_targets(5).expect("get snapshot targets"); - assert_eq!( - targets, - SnapshotTargets { - headers: Some(2..=3), - receipts: Some((2..=3, 4..=7)), - transactions: Some((2..=3, 4..=7)) - } - ); - assert!(targets.is_multiple_of_block_interval(snapshotter.block_interval)); - assert!(targets.is_contiguous_to_highest_snapshots(snapshotter.highest_snapshots)); - // Imitate snapshotter run according to the targets which updates the last snapshots state - snapshotter.set_highest_snapshots_from_targets(&targets); - - // Block body indices not found - assert_matches!(snapshotter.get_snapshot_targets(5), Err(RethError::Custom(_))); - } -} diff --git a/crates/stages/Cargo.toml b/crates/stages/Cargo.toml index 337d582e22d7..fe6a109dacee 100644 --- a/crates/stages/Cargo.toml +++ b/crates/stages/Cargo.toml @@ -26,6 +26,8 @@ reth-codecs.workspace = true reth-provider.workspace = true reth-trie.workspace = true reth-tokio-util.workspace = true +reth-etl.workspace = true +reth-static-file.workspace = true # revm revm.workspace = true @@ -41,6 +43,7 @@ tracing.workspace = true # io serde.workspace = true +tempfile.workspace = true # metrics reth-metrics.workspace = true @@ -66,6 +69,7 @@ reth-node-optimism.workspace = true reth-blockchain-tree.workspace = true reth-revm.workspace = true reth-trie = { workspace = true, features = ["test-utils"] } +reth-provider = { workspace = true, features = ["test-utils"] } alloy-rlp.workspace = true itertools.workspace = true @@ -82,7 +86,7 @@ criterion = { workspace = true, features = ["async_futures"] } serde_json.workspace = true [features] -test-utils = ["reth-interfaces/test-utils", "reth-db/test-utils"] +test-utils = ["reth-interfaces/test-utils", "reth-db/test-utils", "reth-provider/test-utils"] [[bench]] name = "criterion" diff --git a/crates/stages/benches/criterion.rs b/crates/stages/benches/criterion.rs index e9354503d279..eb668ab74f9b 100644 --- a/crates/stages/benches/criterion.rs +++ b/crates/stages/benches/criterion.rs @@ -5,14 +5,14 @@ use criterion::{ }; use pprof::criterion::{Output, PProfProfiler}; use reth_db::{test_utils::TempDatabase, DatabaseEnv}; -use reth_interfaces::test_utils::TestConsensus; -use reth_primitives::stage::StageCheckpoint; + +use reth_primitives::{stage::StageCheckpoint, BlockNumber}; use reth_stages::{ - stages::{MerkleStage, SenderRecoveryStage, TotalDifficultyStage, TransactionLookupStage}, + stages::{MerkleStage, SenderRecoveryStage, TransactionLookupStage}, test_utils::TestStageDB, ExecInput, Stage, StageExt, UnwindInput, }; -use std::{path::PathBuf, sync::Arc}; +use std::{ops::RangeInclusive, sync::Arc}; mod setup; use setup::StageRange; @@ -20,7 +20,7 @@ use setup::StageRange; criterion_group! { name = benches; config = Criterion::default().with_profiler(PProfProfiler::new(1000, Output::Flamegraph(None))); - targets = transaction_lookup, account_hashing, senders, total_difficulty, merkle + targets = transaction_lookup, account_hashing, senders, merkle } criterion_main!(benches); @@ -33,16 +33,9 @@ fn account_hashing(c: &mut Criterion) { group.sample_size(10); let num_blocks = 10_000; - let (path, stage, execution_range) = setup::prepare_account_hashing(num_blocks); + let (db, stage, range) = setup::prepare_account_hashing(num_blocks); - measure_stage_with_path( - path, - &mut group, - setup::stage_unwind, - stage, - execution_range, - "AccountHashing".to_string(), - ); + measure_stage(&mut group, &db, setup::stage_unwind, stage, range, "AccountHashing".to_string()); } fn senders(c: &mut Criterion) { @@ -50,11 +43,13 @@ fn senders(c: &mut Criterion) { // don't need to run each stage for that many times group.sample_size(10); + let db = setup::txs_testdata(DEFAULT_NUM_BLOCKS); + for batch in [1000usize, 10_000, 100_000, 250_000] { let stage = SenderRecoveryStage { commit_threshold: DEFAULT_NUM_BLOCKS }; let label = format!("SendersRecovery-batch-{batch}"); - measure_stage(&mut group, setup::stage_unwind, stage, 0..DEFAULT_NUM_BLOCKS, label); + measure_stage(&mut group, &db, setup::stage_unwind, stage, 0..=DEFAULT_NUM_BLOCKS, label); } } @@ -64,29 +59,15 @@ fn transaction_lookup(c: &mut Criterion) { group.sample_size(10); let stage = TransactionLookupStage::new(DEFAULT_NUM_BLOCKS, None); - measure_stage( - &mut group, - setup::stage_unwind, - stage, - 0..DEFAULT_NUM_BLOCKS, - "TransactionLookup".to_string(), - ); -} - -fn total_difficulty(c: &mut Criterion) { - let mut group = c.benchmark_group("Stages"); - group.measurement_time(std::time::Duration::from_millis(2000)); - group.warm_up_time(std::time::Duration::from_millis(2000)); - // don't need to run each stage for that many times - group.sample_size(10); - let stage = TotalDifficultyStage::new(Arc::new(TestConsensus::default())); + let db = setup::txs_testdata(DEFAULT_NUM_BLOCKS); measure_stage( &mut group, + &db, setup::stage_unwind, stage, - 0..DEFAULT_NUM_BLOCKS, - "TotalDifficulty".to_string(), + 0..=DEFAULT_NUM_BLOCKS, + "TransactionLookup".to_string(), ); } @@ -95,44 +76,58 @@ fn merkle(c: &mut Criterion) { // don't need to run each stage for that many times group.sample_size(10); + let db = setup::txs_testdata(DEFAULT_NUM_BLOCKS); + let stage = MerkleStage::Both { clean_threshold: u64::MAX }; measure_stage( &mut group, + &db, setup::unwind_hashes, stage, - 1..DEFAULT_NUM_BLOCKS, + 1..=DEFAULT_NUM_BLOCKS, "Merkle-incremental".to_string(), ); let stage = MerkleStage::Both { clean_threshold: 0 }; measure_stage( &mut group, + &db, setup::unwind_hashes, stage, - 1..DEFAULT_NUM_BLOCKS, + 1..=DEFAULT_NUM_BLOCKS, "Merkle-fullhash".to_string(), ); } -fn measure_stage_with_path( - path: PathBuf, +fn measure_stage( group: &mut BenchmarkGroup<'_, WallTime>, + db: &TestStageDB, setup: F, stage: S, - stage_range: StageRange, + block_interval: RangeInclusive, label: String, ) where S: Clone + Stage>>, F: Fn(S, &TestStageDB, StageRange), { - let db = TestStageDB::new(&path); + let stage_range = ( + ExecInput { + target: Some(*block_interval.end()), + checkpoint: Some(StageCheckpoint::new(*block_interval.start())), + }, + UnwindInput { + checkpoint: StageCheckpoint::new(*block_interval.end()), + unwind_to: *block_interval.start(), + bad_block: None, + }, + ); let (input, _) = stage_range; group.bench_function(label, move |b| { b.to_async(FuturesExecutor).iter_with_setup( || { // criterion setup does not support async, so we have to use our own runtime - setup(stage.clone(), &db, stage_range) + setup(stage.clone(), db, stage_range) }, |_| async { let mut stage = stage.clone(); @@ -147,35 +142,3 @@ fn measure_stage_with_path( ) }); } - -fn measure_stage( - group: &mut BenchmarkGroup<'_, WallTime>, - setup: F, - stage: S, - block_interval: std::ops::Range, - label: String, -) where - S: Clone + Stage>>, - F: Fn(S, &TestStageDB, StageRange), -{ - let path = setup::txs_testdata(block_interval.end); - - measure_stage_with_path( - path, - group, - setup, - stage, - ( - ExecInput { - target: Some(block_interval.end), - checkpoint: Some(StageCheckpoint::new(block_interval.start)), - }, - UnwindInput { - checkpoint: StageCheckpoint::new(block_interval.end), - unwind_to: block_interval.start, - bad_block: None, - }, - ), - label, - ); -} diff --git a/crates/stages/benches/setup/account_hashing.rs b/crates/stages/benches/setup/account_hashing.rs index 497dce2787f4..d300265355e7 100644 --- a/crates/stages/benches/setup/account_hashing.rs +++ b/crates/stages/benches/setup/account_hashing.rs @@ -1,15 +1,15 @@ #![allow(unreachable_pub)] -use super::{constants, StageRange}; + +use super::constants; use reth_db::{ cursor::DbCursorRO, database::Database, tables, transaction::DbTx, DatabaseError as DbError, }; -use reth_primitives::{fs, stage::StageCheckpoint}; +use reth_primitives::{fs, stage::StageCheckpoint, BlockNumber}; use reth_stages::{ stages::{AccountHashingStage, SeedOpts}, test_utils::TestStageDB, - ExecInput, UnwindInput, }; -use std::path::{Path, PathBuf}; +use std::{ops::RangeInclusive, path::Path}; /// Prepares a database for [`AccountHashingStage`] /// If the environment variable [`constants::ACCOUNT_HASHING_DB`] is set, it will use that one and @@ -17,20 +17,22 @@ use std::path::{Path, PathBuf}; /// generate its own random data. /// /// Returns the path to the database file, stage and range of stage execution if it exists. -pub fn prepare_account_hashing(num_blocks: u64) -> (PathBuf, AccountHashingStage, StageRange) { - let (path, stage_range) = match std::env::var(constants::ACCOUNT_HASHING_DB) { +pub fn prepare_account_hashing( + num_blocks: u64, +) -> (TestStageDB, AccountHashingStage, RangeInclusive) { + let (db, stage_range) = match std::env::var(constants::ACCOUNT_HASHING_DB) { Ok(db) => { let path = Path::new(&db).to_path_buf(); let range = find_stage_range(&path); - (path, range) + (TestStageDB::new(&path), range) } Err(_) => generate_testdata_db(num_blocks), }; - (path, AccountHashingStage::default(), stage_range) + (db, AccountHashingStage::default(), stage_range) } -fn find_stage_range(db: &Path) -> StageRange { +fn find_stage_range(db: &Path) -> RangeInclusive { let mut stage_range = None; TestStageDB::new(db) .factory @@ -40,13 +42,7 @@ fn find_stage_range(db: &Path) -> StageRange { let from = cursor.first()?.unwrap().0; let to = StageCheckpoint::new(cursor.last()?.unwrap().0); - stage_range = Some(( - ExecInput { - target: Some(to.block_number), - checkpoint: Some(StageCheckpoint::new(from)), - }, - UnwindInput { unwind_to: from, checkpoint: to, bad_block: None }, - )); + stage_range = Some(from..=to.block_number); Ok::<(), DbError>(()) }) .unwrap() @@ -55,19 +51,21 @@ fn find_stage_range(db: &Path) -> StageRange { stage_range.expect("Could not find the stage range from the external DB.") } -fn generate_testdata_db(num_blocks: u64) -> (PathBuf, StageRange) { - let opts = SeedOpts { blocks: 0..=num_blocks, accounts: 0..100_000, txs: 100..150 }; +fn generate_testdata_db(num_blocks: u64) -> (TestStageDB, RangeInclusive) { + let opts = SeedOpts { blocks: 0..=num_blocks, accounts: 100_000, txs: 100..150 }; let path = Path::new(env!("CARGO_MANIFEST_DIR")).join("testdata").join("account-hashing-bench"); + let exists = path.exists(); + let db = TestStageDB::new(&path); - if !path.exists() { + if !exists { // create the dirs fs::create_dir_all(&path).unwrap(); println!("Account Hashing testdata not found, generating to {:?}", path.display()); - let db = TestStageDB::new(&path); let provider = db.factory.provider_rw().unwrap(); - let _accounts = AccountHashingStage::seed(&provider, opts); + let _accounts = AccountHashingStage::seed(&provider, opts.clone()); provider.commit().expect("failed to commit"); } - (path, (ExecInput { target: Some(num_blocks), ..Default::default() }, UnwindInput::default())) + + (db, opts.blocks) } diff --git a/crates/stages/benches/setup/mod.rs b/crates/stages/benches/setup/mod.rs index f6322cc50d3f..b63ab63cb633 100644 --- a/crates/stages/benches/setup/mod.rs +++ b/crates/stages/benches/setup/mod.rs @@ -11,21 +11,17 @@ use reth_interfaces::test_utils::{ generators, generators::{ random_block_range, random_changeset_range, random_contract_account_range, - random_eoa_account_range, + random_eoa_accounts, }, }; use reth_primitives::{fs, Account, Address, SealedBlock, B256, U256}; use reth_stages::{ stages::{AccountHashingStage, StorageHashingStage}, - test_utils::TestStageDB, + test_utils::{StorageKind, TestStageDB}, ExecInput, Stage, UnwindInput, }; use reth_trie::StateRoot; -use std::{ - collections::BTreeMap, - path::{Path, PathBuf}, - sync::Arc, -}; +use std::{collections::BTreeMap, path::Path, sync::Arc}; mod constants; @@ -84,8 +80,7 @@ pub(crate) fn unwind_hashes>>>( // Helper for generating testdata for the benchmarks. // Returns the path to the database file. -pub(crate) fn txs_testdata(num_blocks: u64) -> PathBuf { - let path = Path::new(env!("CARGO_MANIFEST_DIR")).join("testdata").join("txs-bench"); +pub(crate) fn txs_testdata(num_blocks: u64) -> TestStageDB { let txs_range = 100..150; // number of storage changes per transition @@ -101,14 +96,17 @@ pub(crate) fn txs_testdata(num_blocks: u64) -> PathBuf { // rng let mut rng = generators::rng(); - if !path.exists() { + let path = Path::new(env!("CARGO_MANIFEST_DIR")).join("testdata").join("txs-bench"); + let exists = path.exists(); + let db = TestStageDB::new(&path); + + if !exists { // create the dirs fs::create_dir_all(&path).unwrap(); println!("Transactions testdata not found, generating to {:?}", path.display()); - let db = TestStageDB::new(&path); let accounts: BTreeMap = concat([ - random_eoa_account_range(&mut rng, 0..n_eoa), + random_eoa_accounts(&mut rng, n_eoa), random_contract_account_range(&mut rng, &mut (0..n_contract)), ]) .into_iter() @@ -167,15 +165,15 @@ pub(crate) fn txs_testdata(num_blocks: u64) -> PathBuf { updated_header.state_root = root; *last_block = SealedBlock { header: updated_header.seal_slow(), ..cloned_last }; - db.insert_blocks(blocks.iter(), None).unwrap(); + db.insert_blocks(blocks.iter(), StorageKind::Static).unwrap(); // initialize TD db.commit(|tx| { let (head, _) = tx.cursor_read::()?.first()?.unwrap_or_default(); - Ok(tx.put::(head, U256::from(0).into())?) + Ok(tx.put::(head, U256::from(0).into())?) }) .unwrap(); } - path + db } diff --git a/crates/stages/src/error.rs b/crates/stages/src/error.rs index 170f592ec833..e8a5e3a71ff7 100644 --- a/crates/stages/src/error.rs +++ b/crates/stages/src/error.rs @@ -3,7 +3,7 @@ use reth_interfaces::{ consensus, db::DatabaseError as DbError, executor, p2p::error::DownloadError, provider::ProviderError, RethError, }; -use reth_primitives::SealedHeader; +use reth_primitives::{BlockNumber, SealedHeader, StaticFileSegment, TxNumber}; use thiserror::Error; use tokio::sync::mpsc::error::SendError; @@ -76,6 +76,36 @@ pub enum StageError { /// rely on external downloaders #[error("invalid download response: {0}")] Download(#[from] DownloadError), + /// Database is ahead of static file data. + #[error("missing static file data for block number: {number}", number = block.number)] + MissingStaticFileData { + /// Starting block with missing data. + block: Box, + /// Static File segment + segment: StaticFileSegment, + }, + /// Unrecoverable inconsistency error related to a transaction number in a static file segment. + #[error( + "inconsistent transaction number for {segment}. db: {database}, static_file: {static_file}" + )] + InconsistentTxNumber { + /// Static File segment where this error was encountered. + segment: StaticFileSegment, + /// Expected database transaction number. + database: TxNumber, + /// Expected static file transaction number. + static_file: TxNumber, + }, + /// Unrecoverable inconsistency error related to a block number in a static file segment. + #[error("inconsistent block number for {segment}. db: {database}, static_file: {static_file}")] + InconsistentBlockNumber { + /// Static File segment where this error was encountered. + segment: StaticFileSegment, + /// Expected database block number. + database: BlockNumber, + /// Expected static file block number. + static_file: BlockNumber, + }, /// Internal error #[error(transparent)] Internal(#[from] RethError), @@ -104,11 +134,20 @@ impl StageError { StageError::MissingDownloadBuffer | StageError::MissingSyncGap | StageError::ChannelClosed | + StageError::InconsistentBlockNumber { .. } | + StageError::InconsistentTxNumber { .. } | + StageError::Internal(_) | StageError::Fatal(_) ) } } +impl From for StageError { + fn from(source: std::io::Error) -> Self { + StageError::Fatal(Box::new(source)) + } +} + /// A pipeline execution error. #[derive(Error, Debug)] pub enum PipelineError { @@ -124,7 +163,7 @@ pub enum PipelineError { /// The pipeline encountered an error while trying to send an event. #[error("pipeline encountered an error while trying to send an event")] Channel(#[from] Box>), - /// The stage encountered an internal error. + /// Internal error #[error(transparent)] - Internal(Box), + Internal(#[from] RethError), } diff --git a/crates/stages/src/lib.rs b/crates/stages/src/lib.rs index 0ad60c6219c4..c2bceceee2bb 100644 --- a/crates/stages/src/lib.rs +++ b/crates/stages/src/lib.rs @@ -18,7 +18,7 @@ //! # use reth_interfaces::consensus::Consensus; //! # use reth_interfaces::test_utils::{TestBodiesClient, TestConsensus, TestHeadersClient}; //! # use reth_revm::EvmProcessorFactory; -//! # use reth_primitives::{PeerId, MAINNET, B256}; +//! # use reth_primitives::{PeerId, MAINNET, B256, PruneModes}; //! # use reth_stages::Pipeline; //! # use reth_stages::sets::DefaultStages; //! # use tokio::sync::watch; @@ -26,6 +26,7 @@ //! # use reth_provider::ProviderFactory; //! # use reth_provider::HeaderSyncMode; //! # use reth_provider::test_utils::create_test_provider_factory; +//! # use reth_static_file::StaticFileProducer; //! # //! # let chain_spec = MAINNET.clone(); //! # let consensus: Arc = Arc::new(TestConsensus::default()); @@ -41,19 +42,27 @@ //! # ); //! # let (tip_tx, tip_rx) = watch::channel(B256::default()); //! # let executor_factory = EvmProcessorFactory::new(chain_spec.clone(), EthEvmConfig::default()); +//! # let static_file_producer = StaticFileProducer::new( +//! # provider_factory.clone(), +//! # provider_factory.static_file_provider(), +//! # PruneModes::default() +//! # ); //! // Create a pipeline that can fully sync //! # let pipeline = //! Pipeline::builder() //! .with_tip_sender(tip_tx) -//! .add_stages(DefaultStages::new( -//! provider_factory.clone(), -//! HeaderSyncMode::Tip(tip_rx), -//! consensus, -//! headers_downloader, -//! bodies_downloader, -//! executor_factory, -//! )) -//! .build(provider_factory); +//! .add_stages( +//! DefaultStages::new( +//! provider_factory.clone(), +//! HeaderSyncMode::Tip(tip_rx), +//! consensus, +//! headers_downloader, +//! bodies_downloader, +//! executor_factory, +//! ) +//! .unwrap(), +//! ) +//! .build(provider_factory, static_file_producer); //! ``` //! //! ## Feature Flags diff --git a/crates/stages/src/pipeline/builder.rs b/crates/stages/src/pipeline/builder.rs index 3e160577fddc..e76f76c604c8 100644 --- a/crates/stages/src/pipeline/builder.rs +++ b/crates/stages/src/pipeline/builder.rs @@ -2,6 +2,7 @@ use crate::{pipeline::BoxedStage, MetricEventsSender, Pipeline, Stage, StageSet} use reth_db::database::Database; use reth_primitives::{stage::StageId, BlockNumber, B256}; use reth_provider::ProviderFactory; +use reth_static_file::StaticFileProducer; use tokio::sync::watch; /// Builds a [`Pipeline`]. @@ -67,12 +68,17 @@ where } /// Builds the final [`Pipeline`] using the given database. - pub fn build(self, provider_factory: ProviderFactory) -> Pipeline { + pub fn build( + self, + provider_factory: ProviderFactory, + static_file_producer: StaticFileProducer, + ) -> Pipeline { let Self { stages, max_block, tip_tx, metrics_tx } = self; Pipeline { provider_factory, stages, max_block, + static_file_producer, tip_tx, listeners: Default::default(), progress: Default::default(), diff --git a/crates/stages/src/pipeline/mod.rs b/crates/stages/src/pipeline/mod.rs index 5ef0a3f6a995..40d010f48608 100644 --- a/crates/stages/src/pipeline/mod.rs +++ b/crates/stages/src/pipeline/mod.rs @@ -3,12 +3,17 @@ use crate::{ }; use futures_util::Future; use reth_db::database::Database; +use reth_interfaces::RethResult; use reth_primitives::{ constants::BEACON_CONSENSUS_REORG_UNWIND_DEPTH, stage::{StageCheckpoint, StageId}, + static_file::HighestStaticFiles, BlockNumber, B256, }; -use reth_provider::{ProviderFactory, StageCheckpointReader, StageCheckpointWriter}; +use reth_provider::{ + providers::StaticFileWriter, ProviderFactory, StageCheckpointReader, StageCheckpointWriter, +}; +use reth_static_file::StaticFileProducer; use reth_tokio_util::EventListeners; use std::pin::Pin; use tokio::sync::watch; @@ -66,6 +71,7 @@ pub struct Pipeline { stages: Vec>, /// The maximum block number to sync to. max_block: Option, + static_file_producer: StaticFileProducer, /// All listeners for events the pipeline emits. listeners: EventListeners, /// Keeps track of the progress of the pipeline. @@ -177,6 +183,8 @@ where /// pipeline (for example the `Finish` stage). Or [ControlFlow::Unwind] of the stage that caused /// the unwind. pub async fn run_loop(&mut self) -> Result { + self.produce_static_files()?; + let mut previous_stage = None; for stage_index in 0..self.stages.len() { let stage = &self.stages[stage_index]; @@ -212,6 +220,33 @@ where Ok(self.progress.next_ctrl()) } + /// Run [static file producer](StaticFileProducer) and move all data from the database to static + /// files for corresponding [segments](reth_primitives::static_file::StaticFileSegment), + /// according to their [stage checkpoints](StageCheckpoint): + /// - [StaticFileSegment::Headers](reth_primitives::static_file::StaticFileSegment::Headers) -> + /// [StageId::Headers] + /// - [StaticFileSegment::Receipts](reth_primitives::static_file::StaticFileSegment::Receipts) + /// -> [StageId::Execution] + /// - [StaticFileSegment::Transactions](reth_primitives::static_file::StaticFileSegment::Transactions) + /// -> [StageId::Bodies] + fn produce_static_files(&mut self) -> RethResult<()> { + let provider = self.provider_factory.provider()?; + let targets = self.static_file_producer.get_static_file_targets(HighestStaticFiles { + headers: provider + .get_stage_checkpoint(StageId::Headers)? + .map(|checkpoint| checkpoint.block_number), + receipts: provider + .get_stage_checkpoint(StageId::Execution)? + .map(|checkpoint| checkpoint.block_number), + transactions: provider + .get_stage_checkpoint(StageId::Bodies)? + .map(|checkpoint| checkpoint.block_number), + })?; + self.static_file_producer.run(targets)?; + + Ok(()) + } + /// Unwind the stages to the target block. /// /// If the unwind is due to a bad block the number of that block should be specified. @@ -279,7 +314,9 @@ where self.listeners .notify(PipelineEvent::Unwound { stage_id, result: unwind_output }); + self.provider_factory.static_file_provider().commit()?; provider_rw.commit()?; + provider_rw = self.provider_factory.provider_rw()?; } Err(err) => { @@ -371,6 +408,7 @@ where result: out.clone(), }); + self.provider_factory.static_file_provider().commit()?; provider_rw.commit()?; if done { @@ -428,6 +466,7 @@ fn on_stage_error( StageId::MerkleExecute, prev_checkpoint.unwrap_or_default(), )?; + factory.static_file_provider().commit()?; provider_rw.commit()?; // We unwind because of a validation error. If the unwind itself @@ -457,6 +496,16 @@ fn on_stage_error( })) } } + } else if let StageError::MissingStaticFileData { block, segment } = err { + error!( + target: "sync::pipeline", + stage = %stage_id, + bad_block = %block.number, + segment = %segment, + "Stage is missing static file data." + ); + + Ok(Some(ControlFlow::Unwind { target: block.number - 1, bad_block: block })) } else if err.is_fatal() { error!(target: "sync::pipeline", stage = %stage_id, "Stage encountered a fatal error: {err}"); Err(err.into()) @@ -492,6 +541,7 @@ mod tests { provider::ProviderError, test_utils::{generators, generators::random_header}, }; + use reth_primitives::PruneModes; use reth_provider::test_utils::create_test_provider_factory; use tokio_stream::StreamExt; @@ -537,7 +587,14 @@ mod tests { .add_exec(Ok(ExecOutput { checkpoint: StageCheckpoint::new(10), done: true })), ) .with_max_block(10) - .build(provider_factory); + .build( + provider_factory.clone(), + StaticFileProducer::new( + provider_factory.clone(), + provider_factory.static_file_provider(), + PruneModes::default(), + ), + ); let events = pipeline.events(); // Run pipeline @@ -597,7 +654,14 @@ mod tests { .add_unwind(Ok(UnwindOutput { checkpoint: StageCheckpoint::new(1) })), ) .with_max_block(10) - .build(provider_factory); + .build( + provider_factory.clone(), + StaticFileProducer::new( + provider_factory.clone(), + provider_factory.static_file_provider(), + PruneModes::default(), + ), + ); let events = pipeline.events(); // Run pipeline @@ -704,7 +768,14 @@ mod tests { .add_exec(Ok(ExecOutput { checkpoint: StageCheckpoint::new(10), done: true })), ) .with_max_block(10) - .build(provider_factory); + .build( + provider_factory.clone(), + StaticFileProducer::new( + provider_factory.clone(), + provider_factory.static_file_provider(), + PruneModes::default(), + ), + ); let events = pipeline.events(); // Run pipeline @@ -801,7 +872,14 @@ mod tests { .add_exec(Ok(ExecOutput { checkpoint: StageCheckpoint::new(10), done: true })), ) .with_max_block(10) - .build(provider_factory); + .build( + provider_factory.clone(), + StaticFileProducer::new( + provider_factory.clone(), + provider_factory.static_file_provider(), + PruneModes::default(), + ), + ); let events = pipeline.events(); // Run pipeline @@ -881,7 +959,14 @@ mod tests { .add_exec(Ok(ExecOutput { checkpoint: StageCheckpoint::new(10), done: true })), ) .with_max_block(10) - .build(provider_factory); + .build( + provider_factory.clone(), + StaticFileProducer::new( + provider_factory.clone(), + provider_factory.static_file_provider(), + PruneModes::default(), + ), + ); let result = pipeline.run().await; assert_matches!(result, Ok(())); @@ -891,7 +976,14 @@ mod tests { .add_stage(TestStage::new(StageId::Other("Fatal")).add_exec(Err( StageError::DatabaseIntegrity(ProviderError::BlockBodyIndicesNotFound(5)), ))) - .build(provider_factory); + .build( + provider_factory.clone(), + StaticFileProducer::new( + provider_factory.clone(), + provider_factory.static_file_provider(), + PruneModes::default(), + ), + ); let result = pipeline.run().await; assert_matches!( result, diff --git a/crates/stages/src/sets.rs b/crates/stages/src/sets.rs index 7879c20d86b0..1b029b11e534 100644 --- a/crates/stages/src/sets.rs +++ b/crates/stages/src/sets.rs @@ -13,14 +13,22 @@ //! # use reth_stages::Pipeline; //! # use reth_stages::sets::{OfflineStages}; //! # use reth_revm::EvmProcessorFactory; -//! # use reth_primitives::MAINNET; +//! # use reth_primitives::{PruneModes, MAINNET}; //! # use reth_node_ethereum::EthEvmConfig; //! # use reth_provider::test_utils::create_test_provider_factory; +//! # use reth_static_file::StaticFileProducer; //! //! # let executor_factory = EvmProcessorFactory::new(MAINNET.clone(), EthEvmConfig::default()); //! # let provider_factory = create_test_provider_factory(); +//! # let static_file_producer = StaticFileProducer::new( +//! provider_factory.clone(), +//! provider_factory.static_file_provider(), +//! PruneModes::default(), +//! ); //! // Build a pipeline with all offline stages. -//! # let pipeline = Pipeline::builder().add_stages(OfflineStages::new(executor_factory)).build(provider_factory); +//! # let pipeline = Pipeline::builder() +//! .add_stages(OfflineStages::new(executor_factory)) +//! .build(provider_factory, static_file_producer); //! ``` //! //! ```ignore @@ -42,9 +50,9 @@ use crate::{ stages::{ AccountHashingStage, BodyStage, ExecutionStage, FinishStage, HeaderStage, IndexAccountHistoryStage, IndexStorageHistoryStage, MerkleStage, SenderRecoveryStage, - StorageHashingStage, TotalDifficultyStage, TransactionLookupStage, + StorageHashingStage, TransactionLookupStage, }, - StageSet, StageSetBuilder, + StageError, StageSet, StageSetBuilder, }; use reth_db::database::Database; use reth_interfaces::{ @@ -53,6 +61,7 @@ use reth_interfaces::{ }; use reth_provider::{ExecutorFactory, HeaderSyncGapProvider, HeaderSyncMode}; use std::sync::Arc; +use tempfile::TempDir; /// A set containing all stages to run a fully syncing instance of reth. /// @@ -64,7 +73,6 @@ use std::sync::Arc; /// /// This expands to the following series of stages: /// - [`HeaderStage`] -/// - [`TotalDifficultyStage`] /// - [`BodyStage`] /// - [`SenderRecoveryStage`] /// - [`ExecutionStage`] @@ -93,20 +101,21 @@ impl DefaultStages { header_downloader: H, body_downloader: B, executor_factory: EF, - ) -> Self + ) -> Result where EF: ExecutorFactory, { - Self { + Ok(Self { online: OnlineStages::new( provider, header_mode, consensus, header_downloader, body_downloader, + Arc::new(TempDir::new()?), ), executor_factory, - } + }) } } @@ -119,17 +128,20 @@ where default_offline: StageSetBuilder, executor_factory: EF, ) -> StageSetBuilder { - default_offline.add_set(OfflineStages::new(executor_factory)).add_stage(FinishStage) + StageSetBuilder::default() + .add_set(default_offline) + .add_set(OfflineStages::new(executor_factory)) + .add_stage(FinishStage) } } -impl StageSet for DefaultStages +impl StageSet for DefaultStages where - DB: Database, Provider: HeaderSyncGapProvider + 'static, H: HeaderDownloader + 'static, B: BodyDownloader + 'static, EF: ExecutorFactory, + DB: Database + 'static, { fn builder(self) -> StageSetBuilder { Self::add_offline_stages(self.online.builder(), self.executor_factory) @@ -152,6 +164,8 @@ pub struct OnlineStages { header_downloader: H, /// The block body downloader body_downloader: B, + /// Temporary directory for ETL usage on headers stage. + temp_dir: Arc, } impl OnlineStages { @@ -162,8 +176,9 @@ impl OnlineStages { consensus: Arc, header_downloader: H, body_downloader: B, + temp_dir: Arc, ) -> Self { - Self { provider, header_mode, consensus, header_downloader, body_downloader } + Self { provider, header_mode, consensus, header_downloader, body_downloader, temp_dir } } } @@ -177,12 +192,8 @@ where pub fn builder_with_headers( headers: HeaderStage, body_downloader: B, - consensus: Arc, ) -> StageSetBuilder { - StageSetBuilder::default() - .add_stage(headers) - .add_stage(TotalDifficultyStage::new(consensus.clone())) - .add_stage(BodyStage::new(body_downloader)) + StageSetBuilder::default().add_stage(headers).add_stage(BodyStage::new(body_downloader)) } /// Create a new builder using the given bodies stage. @@ -192,10 +203,16 @@ where mode: HeaderSyncMode, header_downloader: H, consensus: Arc, + temp_dir: Arc, ) -> StageSetBuilder { StageSetBuilder::default() - .add_stage(HeaderStage::new(provider, header_downloader, mode)) - .add_stage(TotalDifficultyStage::new(consensus.clone())) + .add_stage(HeaderStage::new( + provider, + header_downloader, + mode, + consensus.clone(), + temp_dir.clone(), + )) .add_stage(bodies) } } @@ -209,8 +226,13 @@ where { fn builder(self) -> StageSetBuilder { StageSetBuilder::default() - .add_stage(HeaderStage::new(self.provider, self.header_downloader, self.header_mode)) - .add_stage(TotalDifficultyStage::new(self.consensus.clone())) + .add_stage(HeaderStage::new( + self.provider, + self.header_downloader, + self.header_mode, + self.consensus.clone(), + self.temp_dir.clone(), + )) .add_stage(BodyStage::new(self.body_downloader)) } } diff --git a/crates/stages/src/stages/bodies.rs b/crates/stages/src/stages/bodies.rs index cf9b4dc64b6d..459eace72074 100644 --- a/crates/stages/src/stages/bodies.rs +++ b/crates/stages/src/stages/bodies.rs @@ -5,13 +5,21 @@ use reth_db::{ database::Database, models::{StoredBlockBodyIndices, StoredBlockOmmers, StoredBlockWithdrawals}, tables, - transaction::{DbTx, DbTxMut}, - DatabaseError, + transaction::DbTxMut, +}; +use reth_interfaces::{ + p2p::bodies::{downloader::BodyDownloader, response::BlockResponse}, + provider::ProviderResult, +}; +use reth_primitives::{ + stage::{EntitiesCheckpoint, StageCheckpoint, StageId}, + StaticFileSegment, +}; +use reth_provider::{providers::StaticFileWriter, DatabaseProviderRW, HeaderProvider, StatsReader}; +use std::{ + cmp::Ordering, + task::{ready, Context, Poll}, }; -use reth_interfaces::p2p::bodies::{downloader::BodyDownloader, response::BlockResponse}; -use reth_primitives::stage::{EntitiesCheckpoint, StageCheckpoint, StageId}; -use reth_provider::DatabaseProviderRW; -use std::task::{ready, Context, Poll}; use tracing::*; // TODO(onbjerg): Metrics and events (gradual status for e.g. CLI) @@ -35,7 +43,7 @@ use tracing::*; /// - [`BlockOmmers`][reth_db::tables::BlockOmmers] /// - [`BlockBodies`][reth_db::tables::BlockBodyIndices] /// - [`Transactions`][reth_db::tables::Transactions] -/// - [`TransactionBlock`][reth_db::tables::TransactionBlock] +/// - [`TransactionBlocks`][reth_db::tables::TransactionBlocks] /// /// # Genesis /// @@ -109,13 +117,48 @@ impl Stage for BodyStage { // Cursors used to write bodies, ommers and transactions let tx = provider.tx_ref(); let mut block_indices_cursor = tx.cursor_write::()?; - let mut tx_cursor = tx.cursor_write::()?; - let mut tx_block_cursor = tx.cursor_write::()?; + let mut tx_block_cursor = tx.cursor_write::()?; let mut ommers_cursor = tx.cursor_write::()?; let mut withdrawals_cursor = tx.cursor_write::()?; - // Get id for the next tx_num or zero if there are no transactions. - let mut next_tx_num = tx_cursor.last()?.map(|(id, _)| id + 1).unwrap_or_default(); + // Get id for the next tx_num of zero if there are no transactions. + let mut next_tx_num = tx_block_cursor.last()?.map(|(id, _)| id + 1).unwrap_or_default(); + + let static_file_provider = provider.static_file_provider(); + let mut static_file_producer = + static_file_provider.get_writer(from_block, StaticFileSegment::Transactions)?; + + // Make sure Transactions static file is at the same height. If it's further, this + // input execution was interrupted previously and we need to unwind the static file. + let next_static_file_tx_num = static_file_provider + .get_highest_static_file_tx(StaticFileSegment::Transactions) + .map(|id| id + 1) + .unwrap_or_default(); + + match next_static_file_tx_num.cmp(&next_tx_num) { + // If static files are ahead, then we didn't reach the database commit in a previous + // stage run. So, our only solution is to unwind the static files and proceed from the + // database expected height. + Ordering::Greater => static_file_producer + .prune_transactions(next_static_file_tx_num - next_tx_num, from_block - 1)?, + // If static files are behind, then there was some corruption or loss of files. This + // error will trigger an unwind, that will bring the database to the same height as the + // static files. + Ordering::Less => { + let last_block = static_file_provider + .get_highest_static_file_block(StaticFileSegment::Transactions) + .unwrap_or_default(); + + let missing_block = + Box::new(provider.sealed_header(last_block + 1)?.unwrap_or_default()); + + return Err(StageError::MissingStaticFileData { + block: missing_block, + segment: StaticFileSegment::Transactions, + }) + } + Ordering::Equal => {} + } debug!(target: "sync::stages::bodies", stage_progress = from_block, target = to_block, start_tx_id = next_tx_num, "Commencing sync"); @@ -133,6 +176,23 @@ impl Stage for BodyStage { BlockResponse::Empty(_) => 0, }, }; + + // Increment block on static file header. + if block_number > 0 { + let appended_block_number = + static_file_producer.increment_block(StaticFileSegment::Transactions)?; + + if appended_block_number != block_number { + // This scenario indicates a critical error in the logic of adding new + // items. It should be treated as an `expect()` failure. + return Err(StageError::InconsistentBlockNumber { + segment: StaticFileSegment::Transactions, + database: block_number, + static_file: appended_block_number, + }) + } + } + match response { BlockResponse::Full(block) => { // write transaction block index @@ -142,8 +202,19 @@ impl Stage for BodyStage { // Write transactions for transaction in block.body { - // Append the transaction - tx_cursor.append(next_tx_num, transaction.into())?; + let appended_tx_number = static_file_producer + .append_transaction(next_tx_num, transaction.into())?; + + if appended_tx_number != next_tx_num { + // This scenario indicates a critical error in the logic of adding new + // items. It should be treated as an `expect()` failure. + return Err(StageError::InconsistentTxNumber { + segment: StaticFileSegment::Transactions, + database: next_tx_num, + static_file: appended_tx_number, + }) + } + // Increment transaction id for each transaction. next_tx_num += 1; } @@ -190,14 +261,14 @@ impl Stage for BodyStage { ) -> Result { self.buffer.take(); + let static_file_provider = provider.static_file_provider(); let tx = provider.tx_ref(); // Cursors to unwind bodies, ommers let mut body_cursor = tx.cursor_write::()?; - let mut transaction_cursor = tx.cursor_write::()?; let mut ommers_cursor = tx.cursor_write::()?; let mut withdrawals_cursor = tx.cursor_write::()?; // Cursors to unwind transitions - let mut tx_block_cursor = tx.cursor_write::()?; + let mut tx_block_cursor = tx.cursor_write::()?; let mut rev_walker = body_cursor.walk_back(None)?; while let Some((number, block_meta)) = rev_walker.next().transpose()? { @@ -222,18 +293,41 @@ impl Stage for BodyStage { tx_block_cursor.delete_current()?; } - // Delete all transactions that belong to this block - for tx_id in block_meta.tx_num_range() { - // First delete the transaction - if transaction_cursor.seek_exact(tx_id)?.is_some() { - transaction_cursor.delete_current()?; - } - } - // Delete the current body value rev_walker.delete_current()?; } + let mut static_file_producer = + static_file_provider.latest_writer(StaticFileSegment::Transactions)?; + + // Unwind from static files. Get the current last expected transaction from DB, and match it + // on static file + let db_tx_num = + body_cursor.last()?.map(|(_, block_meta)| block_meta.last_tx_num()).unwrap_or_default(); + let static_file_tx_num: u64 = static_file_provider + .get_highest_static_file_tx(StaticFileSegment::Transactions) + .unwrap_or_default(); + + // If there are more transactions on database, then we are missing static file data and we + // need to unwind further. + if db_tx_num > static_file_tx_num { + let last_block = static_file_provider + .get_highest_static_file_block(StaticFileSegment::Transactions) + .unwrap_or_default(); + + let missing_block = + Box::new(provider.sealed_header(last_block + 1)?.unwrap_or_default()); + + return Err(StageError::MissingStaticFileData { + block: missing_block, + segment: StaticFileSegment::Transactions, + }) + } + + // Unwinds static file + static_file_producer + .prune_transactions(static_file_tx_num.saturating_sub(db_tx_num), input.unwind_to)?; + Ok(UnwindOutput { checkpoint: StageCheckpoint::new(input.unwind_to) .with_entities_stage_checkpoint(stage_checkpoint(provider)?), @@ -246,10 +340,10 @@ impl Stage for BodyStage { // progress in gas as a proxy to size. Execution stage uses a similar approach. fn stage_checkpoint( provider: &DatabaseProviderRW, -) -> Result { +) -> ProviderResult { Ok(EntitiesCheckpoint { - processed: provider.tx_ref().entries::()? as u64, - total: provider.tx_ref().entries::()? as u64, + processed: provider.count_entries::()? as u64, + total: (provider.count_entries::()? as u64).saturating_sub(1), }) } @@ -289,6 +383,7 @@ mod tests { // Check that we only synced around `batch_size` blocks even though the number of blocks // synced by the previous stage is higher let output = rx.await.unwrap(); + runner.db().factory.static_file_provider().commit().unwrap(); assert_matches!( output, Ok(ExecOutput { checkpoint: StageCheckpoint { @@ -325,6 +420,7 @@ mod tests { // Check that we synced all blocks successfully, even though our `batch_size` allows us to // sync more (if there were more headers) let output = rx.await.unwrap(); + runner.db().factory.static_file_provider().commit().unwrap(); assert_matches!( output, Ok(ExecOutput { @@ -362,6 +458,7 @@ mod tests { // Check that we synced at least 10 blocks let first_run = rx.await.unwrap(); + runner.db().factory.static_file_provider().commit().unwrap(); assert_matches!( first_run, Ok(ExecOutput { checkpoint: StageCheckpoint { @@ -382,6 +479,7 @@ mod tests { // Check that we synced more blocks let output = rx.await.unwrap(); + runner.db().factory.static_file_provider().commit().unwrap(); assert_matches!( output, Ok(ExecOutput { checkpoint: StageCheckpoint { @@ -422,6 +520,7 @@ mod tests { // Check that we synced all blocks successfully, even though our `batch_size` allows us to // sync more (if there were more headers) let output = rx.await.unwrap(); + runner.db().factory.static_file_provider().commit().unwrap(); assert_matches!( output, Ok(ExecOutput { checkpoint: StageCheckpoint { @@ -439,16 +538,12 @@ mod tests { .expect("Written block data invalid"); // Delete a transaction - runner - .db() - .commit(|tx| { - let mut tx_cursor = tx.cursor_write::()?; - tx_cursor.last()?.expect("Could not read last transaction"); - tx_cursor.delete_current()?; - Ok(()) - }) - .expect("Could not delete a transaction"); - + let static_file_provider = runner.db().factory.static_file_provider(); + { + let mut static_file_producer = + static_file_provider.latest_writer(StaticFileSegment::Transactions).unwrap(); + static_file_producer.prune_transactions(1, checkpoint.block_number).unwrap(); + } // Unwind all of it let unwind_to = 1; let input = UnwindInput { bad_block: None, checkpoint, unwind_to }; @@ -480,6 +575,7 @@ mod tests { use reth_db::{ cursor::DbCursorRO, models::{StoredBlockBodyIndices, StoredBlockOmmers}, + static_file::HeaderMask, tables, test_utils::TempDatabase, transaction::{DbTx, DbTxMut}, @@ -501,8 +597,13 @@ mod tests { generators::{random_block_range, random_signed_tx}, }, }; - use reth_primitives::{BlockBody, BlockNumber, SealedBlock, SealedHeader, TxNumber, B256}; - use reth_provider::ProviderFactory; + use reth_primitives::{ + BlockBody, BlockHash, BlockNumber, Header, SealedBlock, SealedHeader, + StaticFileSegment, TxNumber, B256, + }; + use reth_provider::{ + providers::StaticFileWriter, HeaderProvider, ProviderFactory, TransactionsProvider, + }; use std::{ collections::{HashMap, VecDeque}, ops::RangeInclusive, @@ -571,24 +672,38 @@ mod tests { fn seed_execution(&mut self, input: ExecInput) -> Result { let start = input.checkpoint().block_number; let end = input.target(); + + let static_file_provider = self.db.factory.static_file_provider(); + let mut rng = generators::rng(); - let blocks = random_block_range(&mut rng, start..=end, GENESIS_HASH, 0..2); + + // Static files do not support gaps in headers, so we need to generate 0 to end + let blocks = random_block_range(&mut rng, 0..=end, GENESIS_HASH, 0..2); self.db.insert_headers_with_td(blocks.iter().map(|block| &block.header))?; - if let Some(progress) = blocks.first() { + if let Some(progress) = blocks.get(start as usize) { // Insert last progress data - self.db.commit(|tx| { + { + let tx = self.db.factory.provider_rw()?.into_tx(); + let mut static_file_producer = static_file_provider + .get_writer(start, StaticFileSegment::Transactions)?; + let body = StoredBlockBodyIndices { first_tx_num: 0, tx_count: progress.body.len() as u64, }; + + static_file_producer.set_block_range(0..=progress.number); + body.tx_num_range().try_for_each(|tx_num| { let transaction = random_signed_tx(&mut rng); - tx.put::(tx_num, transaction.into()) + static_file_producer + .append_transaction(tx_num, transaction.into()) + .map(|_| ()) })?; if body.tx_count != 0 { - tx.put::( - body.first_tx_num(), + tx.put::( + body.last_tx_num(), progress.number, )?; } @@ -601,8 +716,10 @@ mod tests { StoredBlockOmmers { ommers: progress.ommers.clone() }, )?; } - Ok(()) - })?; + + static_file_producer.commit()?; + tx.commit()?; + } } self.set_responses(blocks.iter().map(body_by_hash).collect()); Ok(blocks) @@ -633,7 +750,7 @@ mod tests { if let Some(last_tx_id) = self.get_last_tx_id()? { self.db .ensure_no_entry_above::(last_tx_id, |key| key)?; - self.db.ensure_no_entry_above::( + self.db.ensure_no_entry_above::( last_tx_id, |key| key, )?; @@ -663,13 +780,13 @@ mod tests { prev_progress: BlockNumber, highest_block: BlockNumber, ) -> Result<(), TestRunnerError> { + let static_file_provider = self.db.factory.static_file_provider(); + self.db.query(|tx| { // Acquire cursors on body related tables - let mut headers_cursor = tx.cursor_read::()?; let mut bodies_cursor = tx.cursor_read::()?; let mut ommers_cursor = tx.cursor_read::()?; - let mut transaction_cursor = tx.cursor_read::()?; - let mut tx_block_cursor = tx.cursor_read::()?; + let mut tx_block_cursor = tx.cursor_read::()?; let first_body_key = match bodies_cursor.first()? { Some((key, _)) => key, @@ -678,6 +795,7 @@ mod tests { let mut prev_number: Option = None; + for entry in bodies_cursor.walk(Some(first_body_key))? { let (number, body) = entry?; @@ -695,7 +813,7 @@ mod tests { "We wrote a block body outside of our synced range. Found block with number {number}, highest block according to stage is {highest_block}", ); - let (_, header) = headers_cursor.seek_exact(number)?.expect("to be present"); + let header = static_file_provider.header_by_number(number)?.expect("to be present"); // Validate that ommers exist if any let stored_ommers = ommers_cursor.seek_exact(number)?; if header.ommers_hash_is_empty() { @@ -712,11 +830,9 @@ mod tests { } for tx_id in body.tx_num_range() { - let tx_entry = transaction_cursor.seek_exact(tx_id)?; - assert!(tx_entry.is_some(), "Transaction is missing."); + assert!(static_file_provider.transaction_by_id(tx_id)?.is_some(), "Transaction is missing."); } - prev_number = Some(number); } Ok(()) @@ -775,16 +891,14 @@ mod tests { &mut self, range: RangeInclusive, ) -> DownloadResult<()> { - let provider = self.provider_factory.provider()?; - let mut header_cursor = provider.tx_ref().cursor_read::()?; - - let mut canonical_cursor = - provider.tx_ref().cursor_read::()?; - let walker = canonical_cursor.walk_range(range)?; - - for entry in walker { - let (num, hash) = entry?; - let (_, header) = header_cursor.seek_exact(num)?.expect("missing header"); + let static_file_provider = self.provider_factory.static_file_provider(); + + for header in static_file_provider.fetch_range_iter( + StaticFileSegment::Headers, + *range.start()..*range.end() + 1, + |cursor, number| cursor.get_two::>(number.into()), + )? { + let (header, hash) = header?; self.headers.push_back(header.seal(hash)); } diff --git a/crates/stages/src/stages/execution.rs b/crates/stages/src/stages/execution.rs index 5de44ab7049e..724603e41c03 100644 --- a/crates/stages/src/stages/execution.rs +++ b/crates/stages/src/stages/execution.rs @@ -7,21 +7,23 @@ use reth_db::{ cursor::{DbCursorRO, DbCursorRW, DbDupCursorRO}, database::Database, models::BlockNumberAddress, + static_file::HeaderMask, tables, transaction::{DbTx, DbTxMut}, }; -use reth_interfaces::db::DatabaseError; use reth_primitives::{ stage::{ CheckpointBlockRange, EntitiesCheckpoint, ExecutionCheckpoint, StageCheckpoint, StageId, }, - BlockNumber, Header, PruneModes, U256, + BlockNumber, Header, PruneModes, StaticFileSegment, U256, }; use reth_provider::{ + providers::{StaticFileProvider, StaticFileProviderRWRefMut, StaticFileWriter}, BlockReader, DatabaseProviderRW, ExecutorFactory, HeaderProvider, LatestStateProviderRef, - OriginalValuesKnown, ProviderError, TransactionVariant, + OriginalValuesKnown, ProviderError, StatsReader, TransactionVariant, }; use std::{ + cmp::Ordering, ops::RangeInclusive, time::{Duration, Instant}, }; @@ -33,7 +35,7 @@ use tracing::*; /// Input tables: /// - [tables::CanonicalHeaders] get next block to execute. /// - [tables::Headers] get for revm environment variables. -/// - [tables::HeaderTD] +/// - [tables::HeaderTerminalDifficulties] /// - [tables::BlockBodyIndices] to get tx number /// - [tables::Transactions] to execute /// @@ -47,13 +49,14 @@ use tracing::*; /// - [tables::PlainAccountState] /// - [tables::PlainStorageState] /// - [tables::Bytecodes] -/// - [tables::AccountChangeSet] -/// - [tables::StorageChangeSet] +/// - [tables::AccountChangeSets] +/// - [tables::StorageChangeSets] /// /// For unwinds we are accessing: /// - [tables::BlockBodyIndices] get tx index to know what needs to be unwinded -/// - [tables::AccountHistory] to remove change set and apply old values to -/// - [tables::PlainAccountState] [tables::StorageHistory] to remove change set and apply old values +/// - [tables::AccountsHistory] to remove change set and apply old values to +/// - [tables::PlainAccountState] [tables::StoragesHistory] to remove change set and apply old +/// values /// to [tables::PlainStorageState] // false positive, we cannot derive it if !DB: Debug. #[allow(missing_debug_implementations)] @@ -120,17 +123,26 @@ impl ExecutionStage { let start_block = input.next_block(); let max_block = input.target(); let prune_modes = self.adjust_prune_modes(provider, start_block, max_block)?; + let static_file_provider = provider.static_file_provider(); + + // We only use static files for Receipts, if there is no receipt pruning of any kind. + let mut static_file_producer = None; + if self.prune_modes.receipts.is_none() && self.prune_modes.receipts_log_filter.is_empty() { + static_file_producer = Some(prepare_static_file_producer(provider, start_block)?); + } // Build executor - let mut executor = - self.executor_factory.with_state(LatestStateProviderRef::new(provider.tx_ref())); + let mut executor = self.executor_factory.with_state(LatestStateProviderRef::new( + provider.tx_ref(), + provider.static_file_provider().clone(), + )); executor.set_prune_modes(prune_modes); executor.set_tip(max_block); // Progress tracking let mut stage_progress = start_block; let mut stage_checkpoint = - execution_checkpoint(provider, start_block, max_block, input.checkpoint())?; + execution_checkpoint(static_file_provider, start_block, max_block, input.checkpoint())?; let mut fetch_block_duration = Duration::default(); let mut execution_duration = Duration::default(); @@ -195,7 +207,11 @@ impl ExecutionStage { let time = Instant::now(); // write output - state.write_to_db(provider.tx_ref(), OriginalValuesKnown::Yes)?; + state.write_to_storage( + provider.tx_ref(), + static_file_producer, + OriginalValuesKnown::Yes, + )?; let db_write_duration = time.elapsed(); debug!( target: "sync::stages::execution", @@ -235,7 +251,7 @@ impl ExecutionStage { // If we're not executing MerkleStage from scratch (by threshold or first-sync), then erase // changeset related pruning configurations if !(max_block - start_block > self.external_clean_threshold || - provider.tx_ref().entries::()?.is_zero()) + provider.count_entries::()?.is_zero()) { prune_modes.account_history = None; prune_modes.storage_history = None; @@ -244,12 +260,12 @@ impl ExecutionStage { } } -fn execution_checkpoint( - provider: &DatabaseProviderRW, +fn execution_checkpoint( + provider: &StaticFileProvider, start_block: BlockNumber, max_block: BlockNumber, checkpoint: StageCheckpoint, -) -> Result { +) -> Result { Ok(match checkpoint.execution_stage_checkpoint() { // If checkpoint block range fully matches our range, // we take the previously used stage checkpoint as-is. @@ -311,15 +327,20 @@ fn execution_checkpoint( }) } -fn calculate_gas_used_from_headers( - provider: &DatabaseProviderRW, +fn calculate_gas_used_from_headers( + provider: &StaticFileProvider, range: RangeInclusive, -) -> Result { +) -> Result { let mut gas_total = 0; let start = Instant::now(); - for entry in provider.tx_ref().cursor_read::()?.walk_range(range.clone())? { - let (_, Header { gas_used, .. }) = entry?; + + for entry in provider.fetch_range_iter( + StaticFileSegment::Headers, + *range.start()..*range.end() + 1, + |cursor, number| cursor.get_one::>(number.into()), + )? { + let Header { gas_used, .. } = entry?; gas_total += gas_used; } @@ -352,8 +373,8 @@ impl Stage for ExecutionStage { ) -> Result { let tx = provider.tx_ref(); // Acquire changeset cursors - let mut account_changeset = tx.cursor_dup_write::()?; - let mut storage_changeset = tx.cursor_dup_write::()?; + let mut account_changeset = tx.cursor_dup_write::()?; + let mut storage_changeset = tx.cursor_dup_write::()?; let (range, unwind_to, _) = input.unwind_block_range_with_threshold(self.thresholds.max_blocks.unwrap_or(u64::MAX)); @@ -399,7 +420,7 @@ impl Stage for ExecutionStage { } // Discard unwinded changesets - provider.unwind_table_by_num::(unwind_to)?; + provider.unwind_table_by_num::(unwind_to)?; let mut rev_storage_changeset_walker = storage_changeset.walk_back(None)?; while let Some((key, _)) = rev_storage_changeset_walker.next().transpose()? { @@ -419,17 +440,37 @@ impl Stage for ExecutionStage { let mut stage_checkpoint = input.checkpoint.execution_stage_checkpoint(); // Unwind all receipts for transactions in the block range - let mut cursor = tx.cursor_write::()?; - let mut reverse_walker = cursor.walk_back(None)?; + if self.prune_modes.receipts.is_none() && self.prune_modes.receipts_log_filter.is_empty() { + // We only use static files for Receipts, if there is no receipt pruning of any kind. - while let Some(Ok((tx_number, receipt))) = reverse_walker.next() { - if tx_number < first_tx_num { - break - } - reverse_walker.delete_current()?; + // prepare_static_file_producer does a consistency check that will unwind static files + // if the expected highest receipt in the files is higher than the database. + // Which is essentially what happens here when we unwind this stage. + let _static_file_producer = prepare_static_file_producer(provider, *range.start())?; + // Update the checkpoint. if let Some(stage_checkpoint) = stage_checkpoint.as_mut() { - stage_checkpoint.progress.processed -= receipt.cumulative_gas_used; + for block_number in range { + stage_checkpoint.progress.processed -= provider + .block_by_number(block_number)? + .ok_or_else(|| ProviderError::BlockNotFound(block_number.into()))? + .gas_used; + } + } + } else { + // We database for Receipts, if there is any kind of receipt pruning/filtering. + let mut cursor = tx.cursor_write::()?; + let mut reverse_walker = cursor.walk_back(None)?; + + while let Some(Ok((tx_number, receipt))) = reverse_walker.next() { + if tx_number < first_tx_num { + break + } + reverse_walker.delete_current()?; + + if let Some(stage_checkpoint) = stage_checkpoint.as_mut() { + stage_checkpoint.progress.processed -= receipt.cumulative_gas_used; + } } } @@ -492,22 +533,83 @@ impl ExecutionStageThresholds { } } +/// Returns a `StaticFileProviderRWRefMut` static file producer after performing a consistency +/// check. +/// +/// This function compares the highest receipt number recorded in the database with that in the +/// static file to detect any discrepancies due to unexpected shutdowns or database rollbacks. **If +/// the height in the static file is higher**, it rolls back (unwinds) the static file. +/// **Conversely, if the height in the database is lower**, it triggers a rollback in the database +/// (by returning [`StageError`]) until the heights in both the database and static file match. +fn prepare_static_file_producer<'a, 'b, DB: Database>( + provider: &'b DatabaseProviderRW, + start_block: u64, +) -> Result, StageError> +where + 'b: 'a, +{ + // Get next expected receipt number + let tx = provider.tx_ref(); + let next_receipt_num = tx + .cursor_read::()? + .seek_exact(start_block)? + .map(|(_, value)| value.first_tx_num) + .unwrap_or(0); + + // Get next expected receipt number in static files + let static_file_provider = provider.static_file_provider(); + let next_static_file_receipt_num = static_file_provider + .get_highest_static_file_tx(StaticFileSegment::Receipts) + .map(|num| num + 1) + .unwrap_or(0); + + let mut static_file_producer = + static_file_provider.get_writer(start_block, StaticFileSegment::Receipts)?; + + // Check if we had any unexpected shutdown after committing to static files, but + // NOT committing to database. + match next_static_file_receipt_num.cmp(&next_receipt_num) { + Ordering::Greater => static_file_producer.prune_receipts( + next_static_file_receipt_num - next_receipt_num, + start_block.saturating_sub(1), + )?, + Ordering::Less => { + let last_block = static_file_provider + .get_highest_static_file_block(StaticFileSegment::Receipts) + .unwrap_or(0); + + let missing_block = Box::new( + tx.get::(last_block + 1)?.unwrap_or_default().seal_slow(), + ); + + return Err(StageError::MissingStaticFileData { + block: missing_block, + segment: StaticFileSegment::Receipts, + }) + } + Ordering::Equal => {} + } + + Ok(static_file_producer) +} + #[cfg(test)] mod tests { use super::*; use crate::test_utils::TestStageDB; use alloy_rlp::Decodable; use assert_matches::assert_matches; - use reth_db::{models::AccountBeforeTx, test_utils::create_test_rw_db}; + use reth_db::models::AccountBeforeTx; use reth_interfaces::executor::BlockValidationError; use reth_node_ethereum::EthEvmConfig; use reth_primitives::{ - address, hex_literal::hex, keccak256, stage::StageUnitCheckpoint, Account, Bytecode, - ChainSpecBuilder, SealedBlock, StorageEntry, B256, MAINNET, + address, hex_literal::hex, keccak256, stage::StageUnitCheckpoint, Account, Address, + Bytecode, ChainSpecBuilder, PruneMode, ReceiptsLogPruneConfig, SealedBlock, StorageEntry, + B256, }; - use reth_provider::{AccountReader, BlockWriter, ProviderFactory, ReceiptProvider}; + use reth_provider::{test_utils::create_test_provider_factory, AccountReader, ReceiptProvider}; use reth_revm::EvmProcessorFactory; - use std::sync::Arc; + use std::{collections::BTreeMap, sync::Arc}; fn stage() -> ExecutionStage> { let executor_factory = EvmProcessorFactory::new( @@ -529,9 +631,7 @@ mod tests { #[test] fn execution_checkpoint_matches() { - let state_db = create_test_rw_db(); - let factory = ProviderFactory::new(state_db.as_ref(), MAINNET.clone()); - let tx = factory.provider_rw().unwrap(); + let factory = create_test_provider_factory(); let previous_stage_checkpoint = ExecutionCheckpoint { block_range: CheckpointBlockRange { from: 0, to: 0 }, @@ -543,7 +643,7 @@ mod tests { }; let stage_checkpoint = execution_checkpoint( - &tx, + &factory.static_file_provider(), previous_stage_checkpoint.block_range.from, previous_stage_checkpoint.block_range.to, previous_checkpoint, @@ -554,8 +654,7 @@ mod tests { #[test] fn execution_checkpoint_precedes() { - let state_db = create_test_rw_db(); - let factory = ProviderFactory::new(state_db.as_ref(), MAINNET.clone()); + let factory = create_test_provider_factory(); let provider = factory.provider_rw().unwrap(); let mut genesis_rlp = hex!("f901faf901f5a00000000000000000000000000000000000000000000000000000000000000000a01dcc4de8dec75d7aab85b567b6ccd41ad312451b948a7413f0a142fd40d49347942adc25665018aa1fe0e6bc666dac8fc2697ff9baa045571b40ae66ca7480791bbb2887286e4e4c4b1b298b191c889d6959023a32eda056e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421a056e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421b901000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000083020000808502540be400808000a00000000000000000000000000000000000000000000000000000000000000000880000000000000000c0c0").as_slice(); @@ -563,7 +662,7 @@ mod tests { let mut block_rlp = hex!("f90262f901f9a075c371ba45999d87f4542326910a11af515897aebce5265d3f6acd1f1161f82fa01dcc4de8dec75d7aab85b567b6ccd41ad312451b948a7413f0a142fd40d49347942adc25665018aa1fe0e6bc666dac8fc2697ff9baa098f2dcd87c8ae4083e7017a05456c14eea4b1db2032126e27b3b1563d57d7cc0a08151d548273f6683169524b66ca9fe338b9ce42bc3540046c828fd939ae23bcba03f4e5c2ec5b2170b711d97ee755c160457bb58d8daa338e835ec02ae6860bbabb901000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000083020000018502540be40082a8798203e800a00000000000000000000000000000000000000000000000000000000000000000880000000000000000f863f861800a8405f5e10094100000000000000000000000000000000000000080801ba07e09e26678ed4fac08a249ebe8ed680bf9051a5e14ad223e4b2b9d26e0208f37a05f6e3f188e3e6eab7d7d3b6568f5eac7d687b08d307d3154ccd8c87b4630509bc0").as_slice(); let block = SealedBlock::decode(&mut block_rlp).unwrap(); provider - .insert_block( + .insert_historical_block( genesis .try_seal_with_senders() .map_err(|_| BlockValidationError::SenderRecoveryError) @@ -571,7 +670,15 @@ mod tests { None, ) .unwrap(); - provider.insert_block(block.clone().try_seal_with_senders().unwrap(), None).unwrap(); + provider + .insert_historical_block(block.clone().try_seal_with_senders().unwrap(), None) + .unwrap(); + provider + .static_file_provider() + .latest_writer(StaticFileSegment::Headers) + .unwrap() + .commit() + .unwrap(); provider.commit().unwrap(); let previous_stage_checkpoint = ExecutionCheckpoint { @@ -583,8 +690,8 @@ mod tests { stage_checkpoint: Some(StageUnitCheckpoint::Execution(previous_stage_checkpoint)), }; - let provider = factory.provider_rw().unwrap(); - let stage_checkpoint = execution_checkpoint(&provider, 1, 1, previous_checkpoint); + let stage_checkpoint = + execution_checkpoint(&factory.static_file_provider(), 1, 1, previous_checkpoint); assert_matches!(stage_checkpoint, Ok(ExecutionCheckpoint { block_range: CheckpointBlockRange { from: 1, to: 1 }, @@ -598,16 +705,23 @@ mod tests { #[test] fn execution_checkpoint_recalculate_full_previous_some() { - let state_db = create_test_rw_db(); - let factory = ProviderFactory::new(state_db.as_ref(), MAINNET.clone()); + let factory = create_test_provider_factory(); let provider = factory.provider_rw().unwrap(); let mut genesis_rlp = hex!("f901faf901f5a00000000000000000000000000000000000000000000000000000000000000000a01dcc4de8dec75d7aab85b567b6ccd41ad312451b948a7413f0a142fd40d49347942adc25665018aa1fe0e6bc666dac8fc2697ff9baa045571b40ae66ca7480791bbb2887286e4e4c4b1b298b191c889d6959023a32eda056e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421a056e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421b901000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000083020000808502540be400808000a00000000000000000000000000000000000000000000000000000000000000000880000000000000000c0c0").as_slice(); let genesis = SealedBlock::decode(&mut genesis_rlp).unwrap(); let mut block_rlp = hex!("f90262f901f9a075c371ba45999d87f4542326910a11af515897aebce5265d3f6acd1f1161f82fa01dcc4de8dec75d7aab85b567b6ccd41ad312451b948a7413f0a142fd40d49347942adc25665018aa1fe0e6bc666dac8fc2697ff9baa098f2dcd87c8ae4083e7017a05456c14eea4b1db2032126e27b3b1563d57d7cc0a08151d548273f6683169524b66ca9fe338b9ce42bc3540046c828fd939ae23bcba03f4e5c2ec5b2170b711d97ee755c160457bb58d8daa338e835ec02ae6860bbabb901000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000083020000018502540be40082a8798203e800a00000000000000000000000000000000000000000000000000000000000000000880000000000000000f863f861800a8405f5e10094100000000000000000000000000000000000000080801ba07e09e26678ed4fac08a249ebe8ed680bf9051a5e14ad223e4b2b9d26e0208f37a05f6e3f188e3e6eab7d7d3b6568f5eac7d687b08d307d3154ccd8c87b4630509bc0").as_slice(); let block = SealedBlock::decode(&mut block_rlp).unwrap(); - provider.insert_block(genesis.try_seal_with_senders().unwrap(), None).unwrap(); - provider.insert_block(block.clone().try_seal_with_senders().unwrap(), None).unwrap(); + provider.insert_historical_block(genesis.try_seal_with_senders().unwrap(), None).unwrap(); + provider + .insert_historical_block(block.clone().try_seal_with_senders().unwrap(), None) + .unwrap(); + provider + .static_file_provider() + .latest_writer(StaticFileSegment::Headers) + .unwrap() + .commit() + .unwrap(); provider.commit().unwrap(); let previous_stage_checkpoint = ExecutionCheckpoint { @@ -619,8 +733,8 @@ mod tests { stage_checkpoint: Some(StageUnitCheckpoint::Execution(previous_stage_checkpoint)), }; - let provider = factory.provider_rw().unwrap(); - let stage_checkpoint = execution_checkpoint(&provider, 1, 1, previous_checkpoint); + let stage_checkpoint = + execution_checkpoint(&factory.static_file_provider(), 1, 1, previous_checkpoint); assert_matches!(stage_checkpoint, Ok(ExecutionCheckpoint { block_range: CheckpointBlockRange { from: 1, to: 1 }, @@ -634,22 +748,29 @@ mod tests { #[test] fn execution_checkpoint_recalculate_full_previous_none() { - let state_db = create_test_rw_db(); - let factory = ProviderFactory::new(state_db.as_ref(), MAINNET.clone()); + let factory = create_test_provider_factory(); let provider = factory.provider_rw().unwrap(); let mut genesis_rlp = hex!("f901faf901f5a00000000000000000000000000000000000000000000000000000000000000000a01dcc4de8dec75d7aab85b567b6ccd41ad312451b948a7413f0a142fd40d49347942adc25665018aa1fe0e6bc666dac8fc2697ff9baa045571b40ae66ca7480791bbb2887286e4e4c4b1b298b191c889d6959023a32eda056e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421a056e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421b901000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000083020000808502540be400808000a00000000000000000000000000000000000000000000000000000000000000000880000000000000000c0c0").as_slice(); let genesis = SealedBlock::decode(&mut genesis_rlp).unwrap(); let mut block_rlp = hex!("f90262f901f9a075c371ba45999d87f4542326910a11af515897aebce5265d3f6acd1f1161f82fa01dcc4de8dec75d7aab85b567b6ccd41ad312451b948a7413f0a142fd40d49347942adc25665018aa1fe0e6bc666dac8fc2697ff9baa098f2dcd87c8ae4083e7017a05456c14eea4b1db2032126e27b3b1563d57d7cc0a08151d548273f6683169524b66ca9fe338b9ce42bc3540046c828fd939ae23bcba03f4e5c2ec5b2170b711d97ee755c160457bb58d8daa338e835ec02ae6860bbabb901000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000083020000018502540be40082a8798203e800a00000000000000000000000000000000000000000000000000000000000000000880000000000000000f863f861800a8405f5e10094100000000000000000000000000000000000000080801ba07e09e26678ed4fac08a249ebe8ed680bf9051a5e14ad223e4b2b9d26e0208f37a05f6e3f188e3e6eab7d7d3b6568f5eac7d687b08d307d3154ccd8c87b4630509bc0").as_slice(); let block = SealedBlock::decode(&mut block_rlp).unwrap(); - provider.insert_block(genesis.try_seal_with_senders().unwrap(), None).unwrap(); - provider.insert_block(block.clone().try_seal_with_senders().unwrap(), None).unwrap(); + provider.insert_historical_block(genesis.try_seal_with_senders().unwrap(), None).unwrap(); + provider + .insert_historical_block(block.clone().try_seal_with_senders().unwrap(), None) + .unwrap(); + provider + .static_file_provider() + .latest_writer(StaticFileSegment::Headers) + .unwrap() + .commit() + .unwrap(); provider.commit().unwrap(); let previous_checkpoint = StageCheckpoint { block_number: 1, stage_checkpoint: None }; - let provider = factory.provider_rw().unwrap(); - let stage_checkpoint = execution_checkpoint(&provider, 1, 1, previous_checkpoint); + let stage_checkpoint = + execution_checkpoint(&factory.static_file_provider(), 1, 1, previous_checkpoint); assert_matches!(stage_checkpoint, Ok(ExecutionCheckpoint { block_range: CheckpointBlockRange { from: 1, to: 1 }, @@ -664,16 +785,23 @@ mod tests { async fn sanity_execution_of_block() { // TODO cleanup the setup after https://github.com/paradigmxyz/reth/issues/332 // is merged as it has similar framework - let state_db = create_test_rw_db(); - let factory = ProviderFactory::new(state_db.as_ref(), MAINNET.clone()); + let factory = create_test_provider_factory(); let provider = factory.provider_rw().unwrap(); let input = ExecInput { target: Some(1), checkpoint: None }; let mut genesis_rlp = hex!("f901faf901f5a00000000000000000000000000000000000000000000000000000000000000000a01dcc4de8dec75d7aab85b567b6ccd41ad312451b948a7413f0a142fd40d49347942adc25665018aa1fe0e6bc666dac8fc2697ff9baa045571b40ae66ca7480791bbb2887286e4e4c4b1b298b191c889d6959023a32eda056e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421a056e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421b901000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000083020000808502540be400808000a00000000000000000000000000000000000000000000000000000000000000000880000000000000000c0c0").as_slice(); let genesis = SealedBlock::decode(&mut genesis_rlp).unwrap(); let mut block_rlp = hex!("f90262f901f9a075c371ba45999d87f4542326910a11af515897aebce5265d3f6acd1f1161f82fa01dcc4de8dec75d7aab85b567b6ccd41ad312451b948a7413f0a142fd40d49347942adc25665018aa1fe0e6bc666dac8fc2697ff9baa098f2dcd87c8ae4083e7017a05456c14eea4b1db2032126e27b3b1563d57d7cc0a08151d548273f6683169524b66ca9fe338b9ce42bc3540046c828fd939ae23bcba03f4e5c2ec5b2170b711d97ee755c160457bb58d8daa338e835ec02ae6860bbabb901000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000083020000018502540be40082a8798203e800a00000000000000000000000000000000000000000000000000000000000000000880000000000000000f863f861800a8405f5e10094100000000000000000000000000000000000000080801ba07e09e26678ed4fac08a249ebe8ed680bf9051a5e14ad223e4b2b9d26e0208f37a05f6e3f188e3e6eab7d7d3b6568f5eac7d687b08d307d3154ccd8c87b4630509bc0").as_slice(); let block = SealedBlock::decode(&mut block_rlp).unwrap(); - provider.insert_block(genesis.try_seal_with_senders().unwrap(), None).unwrap(); - provider.insert_block(block.clone().try_seal_with_senders().unwrap(), None).unwrap(); + provider.insert_historical_block(genesis.try_seal_with_senders().unwrap(), None).unwrap(); + provider + .insert_historical_block(block.clone().try_seal_with_senders().unwrap(), None) + .unwrap(); + provider + .static_file_provider() + .latest_writer(StaticFileSegment::Headers) + .unwrap() + .commit() + .unwrap(); provider.commit().unwrap(); // insert pre state @@ -700,69 +828,101 @@ mod tests { db_tx.put::(code_hash, Bytecode::new_raw(code.to_vec().into())).unwrap(); provider.commit().unwrap(); - let provider = factory.provider_rw().unwrap(); - let mut execution_stage: ExecutionStage> = stage(); - let output = execution_stage.execute(&provider, input).unwrap(); - provider.commit().unwrap(); - assert_matches!(output, ExecOutput { - checkpoint: StageCheckpoint { - block_number: 1, - stage_checkpoint: Some(StageUnitCheckpoint::Execution(ExecutionCheckpoint { - block_range: CheckpointBlockRange { - from: 1, - to: 1, - }, - progress: EntitiesCheckpoint { - processed, - total - } - })) - }, - done: true - } if processed == total && total == block.gas_used); - - let provider = factory.provider().unwrap(); - - // check post state - let account1 = address!("1000000000000000000000000000000000000000"); - let account1_info = - Account { balance: U256::ZERO, nonce: 0x00, bytecode_hash: Some(code_hash) }; - let account2 = address!("2adc25665018aa1fe0e6bc666dac8fc2697ff9ba"); - let account2_info = Account { - balance: U256::from(0x1bc16d674ece94bau128), - nonce: 0x00, - bytecode_hash: None, - }; - let account3 = address!("a94f5374fce5edbc8e2a8697c15331677e6ebf0b"); - let account3_info = Account { - balance: U256::from(0x3635c9adc5de996b46u128), - nonce: 0x01, - bytecode_hash: None, - }; + // execute - // assert accounts - assert_eq!( - provider.basic_account(account1), - Ok(Some(account1_info)), - "Post changed of a account" - ); - assert_eq!( - provider.basic_account(account2), - Ok(Some(account2_info)), - "Post changed of a account" - ); - assert_eq!( - provider.basic_account(account3), - Ok(Some(account3_info)), - "Post changed of a account" - ); - // assert storage - // Get on dupsort would return only first value. This is good enough for this test. - assert_eq!( - provider.tx_ref().get::(account1), - Ok(Some(StorageEntry { key: B256::with_last_byte(1), value: U256::from(2) })), - "Post changed of a account" - ); + // If there is a pruning configuration, then it's forced to use the database. + // This way we test both cases. + let modes = [None, Some(PruneModes::none())]; + let random_filter = + ReceiptsLogPruneConfig(BTreeMap::from([(Address::random(), PruneMode::Full)])); + + // Tests node with database and node with static files + for mut mode in modes { + let provider = factory.provider_rw().unwrap(); + + if let Some(mode) = &mut mode { + // Simulating a full node where we write receipts to database + mode.receipts_log_filter = random_filter.clone(); + } + + let mut execution_stage: ExecutionStage> = stage(); + execution_stage.prune_modes = mode.clone().unwrap_or_default(); + + let output = execution_stage.execute(&provider, input).unwrap(); + provider.commit().unwrap(); + + assert_matches!(output, ExecOutput { + checkpoint: StageCheckpoint { + block_number: 1, + stage_checkpoint: Some(StageUnitCheckpoint::Execution(ExecutionCheckpoint { + block_range: CheckpointBlockRange { + from: 1, + to: 1, + }, + progress: EntitiesCheckpoint { + processed, + total + } + })) + }, + done: true + } if processed == total && total == block.gas_used); + + let provider = factory.provider().unwrap(); + + // check post state + let account1 = address!("1000000000000000000000000000000000000000"); + let account1_info = + Account { balance: U256::ZERO, nonce: 0x00, bytecode_hash: Some(code_hash) }; + let account2 = address!("2adc25665018aa1fe0e6bc666dac8fc2697ff9ba"); + let account2_info = Account { + balance: U256::from(0x1bc16d674ece94bau128), + nonce: 0x00, + bytecode_hash: None, + }; + let account3 = address!("a94f5374fce5edbc8e2a8697c15331677e6ebf0b"); + let account3_info = Account { + balance: U256::from(0x3635c9adc5de996b46u128), + nonce: 0x01, + bytecode_hash: None, + }; + + // assert accounts + assert_eq!( + provider.basic_account(account1), + Ok(Some(account1_info)), + "Post changed of a account" + ); + assert_eq!( + provider.basic_account(account2), + Ok(Some(account2_info)), + "Post changed of a account" + ); + assert_eq!( + provider.basic_account(account3), + Ok(Some(account3_info)), + "Post changed of a account" + ); + // assert storage + // Get on dupsort would return only first value. This is good enough for this test. + assert_eq!( + provider.tx_ref().get::(account1), + Ok(Some(StorageEntry { key: B256::with_last_byte(1), value: U256::from(2) })), + "Post changed of a account" + ); + + let provider = factory.provider_rw().unwrap(); + let mut stage = stage(); + stage.prune_modes = mode.unwrap_or_default(); + + let _result = stage + .unwind( + &provider, + UnwindInput { checkpoint: output.checkpoint, unwind_to: 0, bad_block: None }, + ) + .unwrap(); + provider.commit().unwrap(); + } } #[tokio::test] @@ -770,16 +930,23 @@ mod tests { // TODO cleanup the setup after https://github.com/paradigmxyz/reth/issues/332 // is merged as it has similar framework - let state_db = create_test_rw_db(); - let factory = ProviderFactory::new(state_db.as_ref(), MAINNET.clone()); + let factory = create_test_provider_factory(); let provider = factory.provider_rw().unwrap(); let input = ExecInput { target: Some(1), checkpoint: None }; let mut genesis_rlp = hex!("f901faf901f5a00000000000000000000000000000000000000000000000000000000000000000a01dcc4de8dec75d7aab85b567b6ccd41ad312451b948a7413f0a142fd40d49347942adc25665018aa1fe0e6bc666dac8fc2697ff9baa045571b40ae66ca7480791bbb2887286e4e4c4b1b298b191c889d6959023a32eda056e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421a056e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421b901000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000083020000808502540be400808000a00000000000000000000000000000000000000000000000000000000000000000880000000000000000c0c0").as_slice(); let genesis = SealedBlock::decode(&mut genesis_rlp).unwrap(); let mut block_rlp = hex!("f90262f901f9a075c371ba45999d87f4542326910a11af515897aebce5265d3f6acd1f1161f82fa01dcc4de8dec75d7aab85b567b6ccd41ad312451b948a7413f0a142fd40d49347942adc25665018aa1fe0e6bc666dac8fc2697ff9baa098f2dcd87c8ae4083e7017a05456c14eea4b1db2032126e27b3b1563d57d7cc0a08151d548273f6683169524b66ca9fe338b9ce42bc3540046c828fd939ae23bcba03f4e5c2ec5b2170b711d97ee755c160457bb58d8daa338e835ec02ae6860bbabb901000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000083020000018502540be40082a8798203e800a00000000000000000000000000000000000000000000000000000000000000000880000000000000000f863f861800a8405f5e10094100000000000000000000000000000000000000080801ba07e09e26678ed4fac08a249ebe8ed680bf9051a5e14ad223e4b2b9d26e0208f37a05f6e3f188e3e6eab7d7d3b6568f5eac7d687b08d307d3154ccd8c87b4630509bc0").as_slice(); let block = SealedBlock::decode(&mut block_rlp).unwrap(); - provider.insert_block(genesis.try_seal_with_senders().unwrap(), None).unwrap(); - provider.insert_block(block.clone().try_seal_with_senders().unwrap(), None).unwrap(); + provider.insert_historical_block(genesis.try_seal_with_senders().unwrap(), None).unwrap(); + provider + .insert_historical_block(block.clone().try_seal_with_senders().unwrap(), None) + .unwrap(); + provider + .static_file_provider() + .latest_writer(StaticFileSegment::Headers) + .unwrap() + .commit() + .unwrap(); provider.commit().unwrap(); // variables @@ -801,44 +968,77 @@ mod tests { provider.commit().unwrap(); // execute - let provider = factory.provider_rw().unwrap(); - let mut execution_stage = stage(); - let result = execution_stage.execute(&provider, input).unwrap(); - provider.commit().unwrap(); - - let provider = factory.provider_rw().unwrap(); - let mut stage = stage(); - let result = stage - .unwind( - &provider, - UnwindInput { checkpoint: result.checkpoint, unwind_to: 0, bad_block: None }, - ) - .unwrap(); - - assert_matches!(result, UnwindOutput { - checkpoint: StageCheckpoint { - block_number: 0, - stage_checkpoint: Some(StageUnitCheckpoint::Execution(ExecutionCheckpoint { - block_range: CheckpointBlockRange { - from: 1, - to: 1, - }, - progress: EntitiesCheckpoint { - processed: 0, - total - } - })) + let mut provider = factory.provider_rw().unwrap(); + + // If there is a pruning configuration, then it's forced to use the database. + // This way we test both cases. + let modes = [None, Some(PruneModes::none())]; + let random_filter = + ReceiptsLogPruneConfig(BTreeMap::from([(Address::random(), PruneMode::Full)])); + + // Tests node with database and node with static files + for mut mode in modes { + if let Some(mode) = &mut mode { + // Simulating a full node where we write receipts to database + mode.receipts_log_filter = random_filter.clone(); } - } if total == block.gas_used); - // assert unwind stage - assert_eq!(provider.basic_account(acc1), Ok(Some(acc1_info)), "Pre changed of a account"); - assert_eq!(provider.basic_account(acc2), Ok(Some(acc2_info)), "Post changed of a account"); + // Test Execution + let mut execution_stage = stage(); + execution_stage.prune_modes = mode.clone().unwrap_or_default(); + + let result = execution_stage.execute(&provider, input).unwrap(); + provider.commit().unwrap(); - let miner_acc = address!("2adc25665018aa1fe0e6bc666dac8fc2697ff9ba"); - assert_eq!(provider.basic_account(miner_acc), Ok(None), "Third account should be unwound"); + // Test Unwind + provider = factory.provider_rw().unwrap(); + let mut stage = stage(); + stage.prune_modes = mode.unwrap_or_default(); - assert_eq!(provider.receipt(0), Ok(None), "First receipt should be unwound"); + let result = stage + .unwind( + &provider, + UnwindInput { checkpoint: result.checkpoint, unwind_to: 0, bad_block: None }, + ) + .unwrap(); + + assert_matches!(result, UnwindOutput { + checkpoint: StageCheckpoint { + block_number: 0, + stage_checkpoint: Some(StageUnitCheckpoint::Execution(ExecutionCheckpoint { + block_range: CheckpointBlockRange { + from: 1, + to: 1, + }, + progress: EntitiesCheckpoint { + processed: 0, + total + } + })) + } + } if total == block.gas_used); + + // assert unwind stage + assert_eq!( + provider.basic_account(acc1), + Ok(Some(acc1_info)), + "Pre changed of a account" + ); + assert_eq!( + provider.basic_account(acc2), + Ok(Some(acc2_info)), + "Post changed of a account" + ); + + let miner_acc = address!("2adc25665018aa1fe0e6bc666dac8fc2697ff9ba"); + assert_eq!( + provider.basic_account(miner_acc), + Ok(None), + "Third account should be unwound" + ); + + assert_eq!(provider.receipt(0), Ok(None), "First receipt should be unwound"); + } } #[tokio::test] @@ -850,8 +1050,16 @@ mod tests { let genesis = SealedBlock::decode(&mut genesis_rlp).unwrap(); let mut block_rlp = hex!("f9025ff901f7a0c86e8cc0310ae7c531c758678ddbfd16fc51c8cef8cec650b032de9869e8b94fa01dcc4de8dec75d7aab85b567b6ccd41ad312451b948a7413f0a142fd40d49347942adc25665018aa1fe0e6bc666dac8fc2697ff9baa050554882fbbda2c2fd93fdc466db9946ea262a67f7a76cc169e714f105ab583da00967f09ef1dfed20c0eacfaa94d5cd4002eda3242ac47eae68972d07b106d192a0e3c8b47fbfc94667ef4cceb17e5cc21e3b1eebd442cebb27f07562b33836290db90100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000008302000001830f42408238108203e800a00000000000000000000000000000000000000000000000000000000000000000880000000000000000f862f860800a83061a8094095e7baea6a6c7c4c2dfeb977efac326af552d8780801ba072ed817487b84ba367d15d2f039b5fc5f087d0a8882fbdf73e8cb49357e1ce30a0403d800545b8fc544f92ce8124e2255f8c3c6af93f28243a120585d4c4c6a2a3c0").as_slice(); let block = SealedBlock::decode(&mut block_rlp).unwrap(); - provider.insert_block(genesis.try_seal_with_senders().unwrap(), None).unwrap(); - provider.insert_block(block.clone().try_seal_with_senders().unwrap(), None).unwrap(); + provider.insert_historical_block(genesis.try_seal_with_senders().unwrap(), None).unwrap(); + provider + .insert_historical_block(block.clone().try_seal_with_senders().unwrap(), None) + .unwrap(); + provider + .static_file_provider() + .latest_writer(StaticFileSegment::Headers) + .unwrap() + .commit() + .unwrap(); provider.commit().unwrap(); // variables @@ -940,8 +1148,8 @@ mod tests { ); assert!(plain_storage.is_empty()); - let account_changesets = test_db.table::().unwrap(); - let storage_changesets = test_db.table::().unwrap(); + let account_changesets = test_db.table::().unwrap(); + let storage_changesets = test_db.table::().unwrap(); assert_eq!( account_changesets, diff --git a/crates/stages/src/stages/finish.rs b/crates/stages/src/stages/finish.rs index 341be77dd1e1..e0e0057c3c7e 100644 --- a/crates/stages/src/stages/finish.rs +++ b/crates/stages/src/stages/finish.rs @@ -45,6 +45,7 @@ mod tests { generators::{random_header, random_header_range}, }; use reth_primitives::SealedHeader; + use reth_provider::providers::StaticFileWriter; stage_test_suite_ext!(FinishTestRunner, finish); diff --git a/crates/stages/src/stages/hashing_account.rs b/crates/stages/src/stages/hashing_account.rs index 6d098d0177de..13b8b4a53483 100644 --- a/crates/stages/src/stages/hashing_account.rs +++ b/crates/stages/src/stages/hashing_account.rs @@ -8,7 +8,7 @@ use reth_db::{ transaction::{DbTx, DbTxMut}, RawKey, RawTable, }; -use reth_interfaces::db::DatabaseError; +use reth_interfaces::provider::ProviderResult; use reth_primitives::{ keccak256, stage::{ @@ -16,7 +16,7 @@ use reth_primitives::{ StageId, }, }; -use reth_provider::{AccountExtReader, DatabaseProviderRW, HashingWriter}; +use reth_provider::{AccountExtReader, DatabaseProviderRW, HashingWriter, StatsReader}; use std::{ cmp::max, fmt::Debug, @@ -65,8 +65,8 @@ impl Default for AccountHashingStage { pub struct SeedOpts { /// The range of blocks to be generated pub blocks: RangeInclusive, - /// The range of accounts to be generated - pub accounts: Range, + /// The number of accounts to be generated + pub accounts: usize, /// The range of transactions to be generated per block. pub txs: Range, } @@ -77,7 +77,7 @@ impl AccountHashingStage { /// at the target block, with `txs_range` transactions in each block. /// /// Proceeds to go to the `BlockTransitionIndex` end, go back `transitions` and change the - /// account state in the `AccountChangeSet` table. + /// account state in the `AccountChangeSets` table. pub fn seed( provider: &DatabaseProviderRW, opts: SeedOpts, @@ -85,19 +85,25 @@ impl AccountHashingStage { use reth_db::models::AccountBeforeTx; use reth_interfaces::test_utils::{ generators, - generators::{random_block_range, random_eoa_account_range}, + generators::{random_block_range, random_eoa_accounts}, }; use reth_primitives::{Account, B256, U256}; - use reth_provider::BlockWriter; + use reth_provider::providers::StaticFileWriter; let mut rng = generators::rng(); let blocks = random_block_range(&mut rng, opts.blocks.clone(), B256::ZERO, opts.txs); for block in blocks { - provider.insert_block(block.try_seal_with_senders().unwrap(), None).unwrap(); + provider.insert_historical_block(block.try_seal_with_senders().unwrap(), None).unwrap(); } - let mut accounts = random_eoa_account_range(&mut rng, opts.accounts); + provider + .static_file_provider() + .latest_writer(reth_primitives::StaticFileSegment::Headers) + .unwrap() + .commit() + .unwrap(); + let mut accounts = random_eoa_accounts(&mut rng, opts.accounts); { // Account State generator let mut account_cursor = @@ -108,7 +114,7 @@ impl AccountHashingStage { } let mut acc_changeset_cursor = - provider.tx_ref().cursor_write::()?; + provider.tx_ref().cursor_write::()?; for (t, (addr, acc)) in (opts.blocks).zip(&accounts) { let Account { nonce, balance, .. } = acc; let prev_acc = Account { @@ -166,7 +172,7 @@ impl Stage for AccountHashingStage { } _ => { // clear table, load all accounts and hash it - tx.clear::()?; + tx.clear::()?; None } @@ -213,7 +219,7 @@ impl Stage for AccountHashingStage { hashed_batch.par_sort_unstable_by(|a, b| a.0.cmp(&b.0)); let mut hashed_account_cursor = - tx.cursor_write::>()?; + tx.cursor_write::>()?; // iterate and put presorted hashed accounts if start_address.is_none() { @@ -289,10 +295,10 @@ impl Stage for AccountHashingStage { fn stage_checkpoint_progress( provider: &DatabaseProviderRW, -) -> Result { +) -> ProviderResult { Ok(EntitiesCheckpoint { - processed: provider.tx_ref().entries::()? as u64, - total: provider.tx_ref().entries::()? as u64, + processed: provider.count_entries::()? as u64, + total: provider.count_entries::()? as u64, }) } @@ -300,10 +306,12 @@ fn stage_checkpoint_progress( mod tests { use super::*; use crate::test_utils::{ - stage_test_suite_ext, ExecuteStageTestRunner, TestRunnerError, UnwindStageTestRunner, + stage_test_suite_ext, ExecuteStageTestRunner, StageTestRunner, TestRunnerError, + UnwindStageTestRunner, }; use assert_matches::assert_matches; use reth_primitives::{stage::StageUnitCheckpoint, Account, U256}; + use reth_provider::providers::StaticFileWriter; use test_utils::*; stage_test_suite_ext!(AccountHashingTestRunner, account_hashing); @@ -400,7 +408,7 @@ mod tests { }) if address == fifth_address && total == runner.db.table::().unwrap().len() as u64 ); - assert_eq!(runner.db.table::().unwrap().len(), 5); + assert_eq!(runner.db.table::().unwrap().len(), 5); // second run, hash next five accounts. input.checkpoint = Some(result.unwrap().checkpoint); @@ -427,7 +435,7 @@ mod tests { }) if processed == total && total == runner.db.table::().unwrap().len() as u64 ); - assert_eq!(runner.db.table::().unwrap().len(), 10); + assert_eq!(runner.db.table::().unwrap().len(), 10); // Validate the stage execution assert!(runner.validate_execution(input, result.ok()).is_ok(), "execution validation"); @@ -435,7 +443,7 @@ mod tests { mod test_utils { use super::*; - use crate::test_utils::{StageTestRunner, TestStageDB}; + use crate::test_utils::TestStageDB; use reth_primitives::Address; pub(crate) struct AccountHashingTestRunner { @@ -455,11 +463,11 @@ mod tests { } /// Iterates over PlainAccount table and checks that the accounts match the ones - /// in the HashedAccount table + /// in the HashedAccounts table pub(crate) fn check_hashed_accounts(&self) -> Result<(), TestRunnerError> { self.db.query(|tx| { let mut acc_cursor = tx.cursor_read::()?; - let mut hashed_acc_cursor = tx.cursor_read::()?; + let mut hashed_acc_cursor = tx.cursor_read::()?; while let Some((address, account)) = acc_cursor.next()? { let hashed_addr = keccak256(address); @@ -478,7 +486,7 @@ mod tests { pub(crate) fn check_old_hashed_accounts(&self) -> Result<(), TestRunnerError> { self.db.query(|tx| { let mut acc_cursor = tx.cursor_read::()?; - let mut hashed_acc_cursor = tx.cursor_read::()?; + let mut hashed_acc_cursor = tx.cursor_read::()?; while let Some((address, account)) = acc_cursor.next()? { let Account { nonce, balance, .. } = account; @@ -527,7 +535,7 @@ mod tests { let provider = self.db.factory.provider_rw()?; let res = Ok(AccountHashingStage::seed( &provider, - SeedOpts { blocks: 1..=input.target(), accounts: 0..10, txs: 0..3 }, + SeedOpts { blocks: 1..=input.target(), accounts: 10, txs: 0..3 }, ) .unwrap()); provider.commit().expect("failed to commit"); diff --git a/crates/stages/src/stages/hashing_storage.rs b/crates/stages/src/stages/hashing_storage.rs index b4f8f3582f06..c2a18df29940 100644 --- a/crates/stages/src/stages/hashing_storage.rs +++ b/crates/stages/src/stages/hashing_storage.rs @@ -7,7 +7,7 @@ use reth_db::{ tables, transaction::{DbTx, DbTxMut}, }; -use reth_interfaces::db::DatabaseError; +use reth_interfaces::provider::ProviderResult; use reth_primitives::{ keccak256, stage::{ @@ -16,7 +16,7 @@ use reth_primitives::{ }, StorageEntry, }; -use reth_provider::{DatabaseProviderRW, HashingWriter, StorageReader}; +use reth_provider::{DatabaseProviderRW, HashingWriter, StatsReader, StorageReader}; use std::{collections::BTreeMap, fmt::Debug}; use tracing::*; @@ -90,7 +90,7 @@ impl Stage for StorageHashingStage { } _ => { // clear table, load all accounts and hash it - tx.clear::()?; + tx.clear::()?; (None, None) } @@ -152,7 +152,7 @@ impl Stage for StorageHashingStage { // iterate and put presorted hashed slots hashed_batch.into_iter().try_for_each(|((addr, key), value)| { - tx.put::(addr, StorageEntry { key, value }) + tx.put::(addr, StorageEntry { key, value }) })?; if current_key.is_some() { @@ -214,10 +214,10 @@ impl Stage for StorageHashingStage { fn stage_checkpoint_progress( provider: &DatabaseProviderRW, -) -> Result { +) -> ProviderResult { Ok(EntitiesCheckpoint { - processed: provider.tx_ref().entries::()? as u64, - total: provider.tx_ref().entries::()? as u64, + processed: provider.count_entries::()? as u64, + total: provider.count_entries::()? as u64, }) } @@ -239,6 +239,7 @@ mod tests { generators::{random_block_range, random_contract_account_range}, }; use reth_primitives::{stage::StageUnitCheckpoint, Address, SealedBlock, B256, U256}; + use reth_provider::providers::StaticFileWriter; stage_test_suite_ext!(StorageHashingTestRunner, storage_hashing); @@ -363,7 +364,7 @@ mod tests { }) if address == progress_address && storage == progress_key && total == runner.db.table::().unwrap().len() as u64 ); - assert_eq!(runner.db.table::().unwrap().len(), 500); + assert_eq!(runner.db.table::().unwrap().len(), 500); // second run with commit threshold of 2 to check if subkey is set. runner.set_commit_threshold(2); @@ -409,7 +410,7 @@ mod tests { }) if address == progress_address && storage == progress_key && total == runner.db.table::().unwrap().len() as u64 ); - assert_eq!(runner.db.table::().unwrap().len(), 502); + assert_eq!(runner.db.table::().unwrap().len(), 502); // third last run, hash rest of storages. runner.set_commit_threshold(1000); @@ -442,7 +443,7 @@ mod tests { total == runner.db.table::().unwrap().len() as u64 ); assert_eq!( - runner.db.table::().unwrap().len(), + runner.db.table::().unwrap().len(), runner.db.table::().unwrap().len() ); @@ -501,7 +502,10 @@ mod tests { self.db.commit(|tx| { progress.body.iter().try_for_each( |transaction| -> Result<(), reth_db::DatabaseError> { - tx.put::(transaction.hash(), next_tx_num)?; + tx.put::( + transaction.hash(), + next_tx_num, + )?; tx.put::( next_tx_num, transaction.clone().into(), @@ -594,7 +598,7 @@ mod tests { .query(|tx| { let mut storage_cursor = tx.cursor_dup_read::()?; let mut hashed_storage_cursor = - tx.cursor_dup_read::()?; + tx.cursor_dup_read::()?; let mut expected = 0; @@ -609,7 +613,7 @@ mod tests { ); expected += 1; } - let count = tx.cursor_dup_read::()?.walk(None)?.count(); + let count = tx.cursor_dup_read::()?.walk(None)?.count(); assert_eq!(count, expected); Ok(()) @@ -641,18 +645,18 @@ mod tests { let hashed_entry = StorageEntry { key: keccak256(entry.key), value: entry.value }; if let Some(e) = tx - .cursor_dup_write::()? + .cursor_dup_write::()? .seek_by_key_subkey(hashed_address, hashed_entry.key)? .filter(|e| e.key == hashed_entry.key) { - tx.delete::(hashed_address, Some(e)) + tx.delete::(hashed_address, Some(e)) .expect("failed to delete entry"); } - tx.put::(hashed_address, hashed_entry)?; + tx.put::(hashed_address, hashed_entry)?; } - tx.put::(bn_address, prev_entry)?; + tx.put::(bn_address, prev_entry)?; Ok(()) } @@ -661,7 +665,7 @@ mod tests { let target_block = input.unwind_to; self.db.commit(|tx| { let mut storage_cursor = tx.cursor_dup_write::()?; - let mut changeset_cursor = tx.cursor_dup_read::()?; + let mut changeset_cursor = tx.cursor_dup_read::()?; let mut rev_changeset_walker = changeset_cursor.walk_back(None)?; diff --git a/crates/stages/src/stages/headers.rs b/crates/stages/src/stages/headers.rs index bc0dc05ace50..d34ffa46ba8e 100644 --- a/crates/stages/src/stages/headers.rs +++ b/crates/stages/src/stages/headers.rs @@ -1,12 +1,16 @@ -use crate::{ExecInput, ExecOutput, Stage, StageError, UnwindInput, UnwindOutput}; +use crate::{BlockErrorKind, ExecInput, ExecOutput, Stage, StageError, UnwindInput, UnwindOutput}; use futures_util::StreamExt; +use reth_codecs::Compact; use reth_db::{ cursor::{DbCursorRO, DbCursorRW}, database::Database, tables, - transaction::{DbTx, DbTxMut}, + transaction::DbTxMut, + RawKey, RawTable, RawValue, }; +use reth_etl::Collector; use reth_interfaces::{ + consensus::Consensus, p2p::headers::{downloader::HeaderDownloader, error::HeadersDownloaderError}, provider::ProviderError, }; @@ -14,10 +18,18 @@ use reth_primitives::{ stage::{ CheckpointBlockRange, EntitiesCheckpoint, HeadersCheckpoint, StageCheckpoint, StageId, }, - BlockHashOrNumber, BlockNumber, SealedHeader, + BlockHash, BlockNumber, SealedHeader, StaticFileSegment, }; -use reth_provider::{DatabaseProviderRW, HeaderSyncGap, HeaderSyncGapProvider, HeaderSyncMode}; -use std::task::{ready, Context, Poll}; +use reth_provider::{ + providers::{StaticFileProvider, StaticFileWriter}, + BlockHashReader, DatabaseProviderRW, HeaderProvider, HeaderSyncGap, HeaderSyncGapProvider, + HeaderSyncMode, +}; +use std::{ + sync::Arc, + task::{ready, Context, Poll}, +}; +use tempfile::TempDir; use tracing::*; /// The headers stage. @@ -41,10 +53,16 @@ pub struct HeaderStage { downloader: Downloader, /// The sync mode for the stage. mode: HeaderSyncMode, + /// Consensus client implementation + consensus: Arc, /// Current sync gap. sync_gap: Option, - /// Header buffer. - buffer: Option>, + /// ETL collector with HeaderHash -> BlockNumber + hash_collector: Collector, + /// ETL collector with BlockNumber -> SealedHeader + header_collector: Collector, + /// Returns true if the ETL collector has all necessary headers to fill the gap. + is_etl_ready: bool, } // === impl HeaderStage === @@ -54,56 +72,121 @@ where Downloader: HeaderDownloader, { /// Create a new header stage - pub fn new(database: Provider, downloader: Downloader, mode: HeaderSyncMode) -> Self { - Self { provider: database, downloader, mode, sync_gap: None, buffer: None } - } - - fn is_stage_done( - &self, - tx: &::TXMut, - checkpoint: u64, - ) -> Result { - let mut header_cursor = tx.cursor_read::()?; - let (head_num, _) = header_cursor - .seek_exact(checkpoint)? - .ok_or_else(|| ProviderError::HeaderNotFound(checkpoint.into()))?; - // Check if the next entry is congruent - Ok(header_cursor.next()?.map(|(next_num, _)| head_num + 1 == next_num).unwrap_or_default()) + pub fn new( + database: Provider, + downloader: Downloader, + mode: HeaderSyncMode, + consensus: Arc, + tempdir: Arc, + ) -> Self { + Self { + provider: database, + downloader, + mode, + consensus, + sync_gap: None, + hash_collector: Collector::new(tempdir.clone(), 100 * (1024 * 1024)), + header_collector: Collector::new(tempdir, 100 * (1024 * 1024)), + is_etl_ready: false, + } } - /// Write downloaded headers to the given transaction + /// Write downloaded headers to the given transaction from ETL. /// - /// Note: this writes the headers with rising block numbers. + /// Writes to the following tables: + /// [`tables::Headers`], [`tables::CanonicalHeaders`], [`tables::HeaderTerminalDifficulties`] + /// and [`tables::HeaderNumbers`]. fn write_headers( - &self, + &mut self, tx: &::TXMut, - headers: Vec, - ) -> Result, StageError> { - trace!(target: "sync::stages::headers", len = headers.len(), "writing headers"); - - let mut cursor_header = tx.cursor_write::()?; - let mut cursor_canonical = tx.cursor_write::()?; + static_file_provider: StaticFileProvider, + ) -> Result { + let total_headers = self.header_collector.len(); + + info!(target: "sync::stages::headers", total = total_headers, "Writing headers"); + + // Consistency check of expected headers in static files vs DB is done on provider::sync_gap + // when poll_execute_ready is polled. + let mut last_header_number = static_file_provider + .get_highest_static_file_block(StaticFileSegment::Headers) + .unwrap_or_default(); + + // Find the latest total difficulty + let mut td = static_file_provider + .header_td_by_number(last_header_number)? + .ok_or(ProviderError::TotalDifficultyNotFound(last_header_number))?; + + // Although headers were downloaded in reverse order, the collector iterates it in ascending + // order + let mut writer = static_file_provider.latest_writer(StaticFileSegment::Headers)?; + let interval = (total_headers / 10).max(1); + for (index, header) in self.header_collector.iter()?.enumerate() { + let (_, header_buf) = header?; + + if index > 0 && index % interval == 0 { + info!(target: "sync::stages::headers", progress = %format!("{:.2}%", (index as f64 / total_headers as f64) * 100.0), "Writing headers"); + } - let mut latest = None; - // Since the headers were returned in descending order, - // iterate them in the reverse order - for header in headers.into_iter().rev() { + let (sealed_header, _) = SealedHeader::from_compact(&header_buf, header_buf.len()); + let (header, header_hash) = sealed_header.split(); if header.number == 0 { continue } + last_header_number = header.number; - let header_hash = header.hash(); - let header_number = header.number; - let header = header.unseal(); - latest = Some(header.number); + // Increase total difficulty + td += header.difficulty; + + // Header validation + self.consensus.validate_header_with_total_difficulty(&header, td).map_err(|error| { + StageError::Block { + block: Box::new(header.clone().seal(header_hash)), + error: BlockErrorKind::Validation(error), + } + })?; - // NOTE: HeaderNumbers are not sorted and can't be inserted with cursor. - tx.put::(header_hash, header_number)?; - cursor_header.insert(header_number, header)?; - cursor_canonical.insert(header_number, header_hash)?; + // Append to Headers segment + writer.append_header(header, td, header_hash)?; } - Ok(latest) + info!(target: "sync::stages::headers", total = total_headers, "Writing header hash index"); + + let mut cursor_header_numbers = tx.cursor_write::>()?; + let mut first_sync = false; + + // If we only have the genesis block hash, then we are at first sync, and we can remove it, + // add it to the collector and use tx.append on all hashes. + if let Some((hash, block_number)) = cursor_header_numbers.last()? { + if block_number.value()? == 0 { + self.hash_collector.insert(hash.key()?, 0); + cursor_header_numbers.delete_current()?; + first_sync = true; + } + } + + // Since ETL sorts all entries by hashes, we are either appending (first sync) or inserting + // in order (further syncs). + for (index, hash_to_number) in self.hash_collector.iter()?.enumerate() { + let (hash, number) = hash_to_number?; + + if index > 0 && index % interval == 0 { + info!(target: "sync::stages::headers", progress = %format!("{:.2}%", (index as f64 / total_headers as f64) * 100.0), "Writing headers hash index"); + } + + if first_sync { + cursor_header_numbers.append( + RawKey::::from_vec(hash), + RawValue::::from_vec(number), + )?; + } else { + cursor_header_numbers.insert( + RawKey::::from_vec(hash), + RawValue::::from_vec(number), + )?; + } + } + + Ok(last_header_number) } } @@ -125,14 +208,8 @@ where ) -> Poll> { let current_checkpoint = input.checkpoint(); - // Return if buffer already has some items. - if self.buffer.is_some() { - // TODO: review - trace!( - target: "sync::stages::headers", - checkpoint = %current_checkpoint.block_number, - "Buffer is not empty" - ); + // Return if stage has already completed the gap on the ETL files + if self.is_etl_ready { return Poll::Ready(Ok(())) } @@ -149,27 +226,42 @@ where target = ?tip, "Target block already reached" ); + self.is_etl_ready = true; return Poll::Ready(Ok(())) } debug!(target: "sync::stages::headers", ?tip, head = ?gap.local_head.hash(), "Commencing sync"); + let local_head_number = gap.local_head.number; // let the downloader know what to sync - self.downloader.update_sync_gap(gap.local_head, gap.target); - - let result = match ready!(self.downloader.poll_next_unpin(cx)) { - Some(Ok(headers)) => { - info!(target: "sync::stages::headers", len = headers.len(), "Received headers"); - self.buffer = Some(headers); - Ok(()) - } - Some(Err(HeadersDownloaderError::DetachedHead { local_head, header, error })) => { - error!(target: "sync::stages::headers", %error, "Cannot attach header to head"); - Err(StageError::DetachedHead { local_head, header, error }) + self.downloader.update_sync_gap(gap.local_head, gap.target.clone()); + + // We only want to stop once we have all the headers on ETL filespace (disk). + loop { + match ready!(self.downloader.poll_next_unpin(cx)) { + Some(Ok(headers)) => { + info!(target: "sync::stages::headers", total = headers.len(), from_block = headers.first().map(|h| h.number), to_block = headers.last().map(|h| h.number), "Received headers"); + for header in headers { + let header_number = header.number; + + self.hash_collector.insert(header.hash(), header_number); + self.header_collector.insert(header_number, header); + + // Headers are downloaded in reverse, so if we reach here, we know we have + // filled the gap. + if header_number == local_head_number + 1 { + self.is_etl_ready = true; + return Poll::Ready(Ok(())) + } + } + } + Some(Err(HeadersDownloaderError::DetachedHead { local_head, header, error })) => { + error!(target: "sync::stages::headers", %error, "Cannot attach header to head"); + return Poll::Ready(Err(StageError::DetachedHead { local_head, header, error })) + } + None => return Poll::Ready(Err(StageError::ChannelClosed)), } - None => Err(StageError::ChannelClosed), - }; - Poll::Ready(result) + } } /// Download the headers in reverse order (falling block numbers) @@ -181,99 +273,41 @@ where ) -> Result { let current_checkpoint = input.checkpoint(); - let gap = self.sync_gap.clone().ok_or(StageError::MissingSyncGap)?; - if gap.is_closed() { + if self.sync_gap.as_ref().ok_or(StageError::MissingSyncGap)?.is_closed() { + self.is_etl_ready = false; return Ok(ExecOutput::done(current_checkpoint)) } - let local_head = gap.local_head.number; - let tip = gap.target.tip(); + // We should be here only after we have downloaded all headers into the disk buffer (ETL). + if !self.is_etl_ready { + return Err(StageError::MissingDownloadBuffer) + } - let downloaded_headers = self.buffer.take().ok_or(StageError::MissingDownloadBuffer)?; - let tip_block_number = match tip { - // If tip is hash and it equals to the first downloaded header's hash, we can use - // the block number of this header as tip. - BlockHashOrNumber::Hash(hash) => downloaded_headers - .first() - .and_then(|header| (header.hash() == hash).then_some(header.number)), - // If tip is number, we can just grab it and not resolve using downloaded headers. - BlockHashOrNumber::Number(number) => Some(number), - }; + // Reset flag + self.is_etl_ready = false; - // Since we're syncing headers in batches, gap tip will move in reverse direction towards - // our local head with every iteration. To get the actual target block number we're - // syncing towards, we need to take into account already synced headers from the database. - // It is `None`, if tip didn't change and we're still downloading headers for previously - // calculated gap. - let tx = provider.tx_ref(); - let target_block_number = if let Some(tip_block_number) = tip_block_number { - let local_max_block_number = tx - .cursor_read::()? - .last()? - .map(|(canonical_block, _)| canonical_block); - - Some(tip_block_number.max(local_max_block_number.unwrap_or_default())) - } else { - None - }; + // Write the headers and related tables to DB from ETL space + let to_be_processed = self.hash_collector.len() as u64; + let last_header_number = + self.write_headers::(provider.tx_ref(), provider.static_file_provider().clone())?; - let mut stage_checkpoint = match current_checkpoint.headers_stage_checkpoint() { - // If checkpoint block range matches our range, we take the previously used - // stage checkpoint as-is. - Some(stage_checkpoint) - if stage_checkpoint.block_range.from == input.checkpoint().block_number => - { - stage_checkpoint - } - // Otherwise, we're on the first iteration of new gap sync, so we recalculate the number - // of already processed and total headers. - // `target_block_number` is guaranteed to be `Some`, because on the first iteration - // we download the header for missing tip and use its block number. - _ => { - let target = target_block_number.expect("No downloaded header for tip found"); + Ok(ExecOutput { + checkpoint: StageCheckpoint::new(last_header_number).with_headers_stage_checkpoint( HeadersCheckpoint { block_range: CheckpointBlockRange { from: input.checkpoint().block_number, - to: target, + to: last_header_number, }, progress: EntitiesCheckpoint { - // Set processed to the local head block number + number - // of block already filled in the gap. - processed: local_head + (target - tip_block_number.unwrap_or_default()), - total: target, + processed: input.checkpoint().block_number + to_be_processed, + total: last_header_number, }, - } - } - }; - - // Total headers can be updated if we received new tip from the network, and need to fill - // the local gap. - if let Some(target_block_number) = target_block_number { - stage_checkpoint.progress.total = target_block_number; - } - stage_checkpoint.progress.processed += downloaded_headers.len() as u64; - - // Write the headers to db - self.write_headers::(tx, downloaded_headers)?.unwrap_or_default(); - - if self.is_stage_done::(tx, current_checkpoint.block_number)? { - let checkpoint = current_checkpoint.block_number.max( - tx.cursor_read::()? - .last()? - .map(|(num, _)| num) - .unwrap_or_default(), - ); - Ok(ExecOutput { - checkpoint: StageCheckpoint::new(checkpoint) - .with_headers_stage_checkpoint(stage_checkpoint), - done: true, - }) - } else { - Ok(ExecOutput { - checkpoint: current_checkpoint.with_headers_stage_checkpoint(stage_checkpoint), - done: false, - }) - } + }, + ), + // We only reach here if all headers have been downloaded by ETL, and pushed to DB all + // in one stage run. + done: true, + }) } /// Unwind the stage. @@ -282,23 +316,30 @@ where provider: &DatabaseProviderRW, input: UnwindInput, ) -> Result { - self.buffer.take(); self.sync_gap.take(); - provider.unwind_table_by_walker::( - input.unwind_to + 1, - )?; - provider.unwind_table_by_num::(input.unwind_to)?; - let unwound_headers = provider.unwind_table_by_num::(input.unwind_to)?; + let static_file_provider = provider.static_file_provider(); + let highest_block = static_file_provider + .get_highest_static_file_block(StaticFileSegment::Headers) + .unwrap_or_default(); + let unwound_headers = highest_block - input.unwind_to; + + for block in (input.unwind_to + 1)..=highest_block { + let header_hash = static_file_provider + .block_hash(block)? + .ok_or(ProviderError::HeaderNotFound(block.into()))?; + + provider.tx_ref().delete::(header_hash, None)?; + } + + let mut writer = static_file_provider.latest_writer(StaticFileSegment::Headers)?; + writer.prune_headers(unwound_headers)?; let stage_checkpoint = input.checkpoint.headers_stage_checkpoint().map(|stage_checkpoint| HeadersCheckpoint { block_range: stage_checkpoint.block_range, progress: EntitiesCheckpoint { - processed: stage_checkpoint - .progress - .processed - .saturating_sub(unwound_headers as u64), + processed: stage_checkpoint.progress.processed.saturating_sub(unwound_headers), total: stage_checkpoint.progress.total, }, }); @@ -335,9 +376,7 @@ mod tests { generators, generators::random_header_range, TestConsensus, TestHeaderDownloader, TestHeadersClient, }; - use reth_primitives::U256; - use reth_provider::{BlockHashReader, BlockNumReader, HeaderProvider}; - use std::sync::Arc; + use reth_provider::BlockNumReader; use tokio::sync::watch; pub(crate) struct HeadersTestRunner { @@ -345,6 +384,7 @@ mod tests { channel: (watch::Sender, watch::Receiver), downloader_factory: Box D + Send + Sync + 'static>, db: TestStageDB, + consensus: Arc, } impl Default for HeadersTestRunner { @@ -353,6 +393,7 @@ mod tests { Self { client: client.clone(), channel: watch::channel(B256::ZERO), + consensus: Arc::new(TestConsensus::default()), downloader_factory: Box::new(move || { TestHeaderDownloader::new( client.clone(), @@ -378,6 +419,8 @@ mod tests { self.db.factory.clone(), (*self.downloader_factory)(), HeaderSyncMode::Tip(self.channel.1.clone()), + self.consensus.clone(), + Arc::new(TempDir::new().unwrap()), ) } } @@ -388,11 +431,9 @@ mod tests { fn seed_execution(&mut self, input: ExecInput) -> Result { let mut rng = generators::rng(); let start = input.checkpoint().block_number; - let head = random_header(&mut rng, start, None); - self.db.insert_headers(std::iter::once(&head))?; - // patch td table for `update_head` call - self.db - .commit(|tx| Ok(tx.put::(head.number, U256::ZERO.into())?))?; + let headers = random_header_range(&mut rng, 0..start + 1, B256::ZERO); + let head = headers.last().cloned().unwrap(); + self.db.insert_headers_with_td(headers.iter())?; // use previous checkpoint as seed size let end = input.target.unwrap_or_default() + 1; @@ -416,8 +457,11 @@ mod tests { match output { Some(output) if output.checkpoint.block_number > initial_checkpoint => { let provider = self.db.factory.provider()?; - for block_num in (initial_checkpoint..output.checkpoint.block_number).rev() - { + let mut td = provider + .header_td_by_number(initial_checkpoint.saturating_sub(1))? + .unwrap_or_default(); + + for block_num in initial_checkpoint..output.checkpoint.block_number { // look up the header hash let hash = provider.block_hash(block_num)?.expect("no header hash"); @@ -429,6 +473,13 @@ mod tests { assert!(header.is_some()); let header = header.unwrap().seal_slow(); assert_eq!(header.hash(), hash); + + // validate the header total difficulty + td += header.difficulty; + assert_eq!( + provider.header_td_by_number(block_num)?.map(Into::into), + Some(td) + ); } } _ => self.check_no_header_entry_above(initial_checkpoint)?, @@ -468,6 +519,7 @@ mod tests { .build(client.clone(), Arc::new(TestConsensus::default())) }), db: TestStageDB::default(), + consensus: Arc::new(TestConsensus::default()), } } } @@ -481,6 +533,10 @@ mod tests { .ensure_no_entry_above_by_value::(block, |val| val)?; self.db.ensure_no_entry_above::(block, |key| key)?; self.db.ensure_no_entry_above::(block, |key| key)?; + self.db.ensure_no_entry_above::( + block, + |num| num, + )?; Ok(()) } @@ -511,6 +567,7 @@ mod tests { runner.send_tip(tip.hash()); let result = rx.await.unwrap(); + runner.db().factory.static_file_provider().commit().unwrap(); assert_matches!(result, Ok(ExecOutput { checkpoint: StageCheckpoint { block_number, stage_checkpoint: Some(StageUnitCheckpoint::Headers(HeadersCheckpoint { @@ -526,69 +583,8 @@ mod tests { }, done: true }) if block_number == tip.number && from == checkpoint && to == previous_stage && // -1 because we don't need to download the local head - processed == checkpoint + headers.len() as u64 - 1 && total == tip.number); - assert!(runner.validate_execution(input, result.ok()).is_ok(), "validation failed"); - } - - /// Execute the stage in two steps - #[tokio::test] - async fn execute_from_previous_checkpoint() { - let mut runner = HeadersTestRunner::with_linear_downloader(); - // pick range that's larger than the configured headers batch size - let (checkpoint, previous_stage) = (600, 1200); - let mut input = ExecInput { - target: Some(previous_stage), - checkpoint: Some(StageCheckpoint::new(checkpoint)), - }; - let headers = runner.seed_execution(input).expect("failed to seed execution"); - let rx = runner.execute(input); - - runner.client.extend(headers.iter().rev().map(|h| h.clone().unseal())).await; - - // skip `after_execution` hook for linear downloader - let tip = headers.last().unwrap(); - runner.send_tip(tip.hash()); - - let result = rx.await.unwrap(); - assert_matches!(result, Ok(ExecOutput { checkpoint: StageCheckpoint { - block_number, - stage_checkpoint: Some(StageUnitCheckpoint::Headers(HeadersCheckpoint { - block_range: CheckpointBlockRange { - from, - to - }, - progress: EntitiesCheckpoint { - processed, - total, - } - })) - }, done: false }) if block_number == checkpoint && - from == checkpoint && to == previous_stage && - processed == checkpoint + 500 && total == tip.number); - - runner.client.clear().await; - runner.client.extend(headers.iter().take(101).map(|h| h.clone().unseal()).rev()).await; - input.checkpoint = Some(result.unwrap().checkpoint); - - let rx = runner.execute(input); - let result = rx.await.unwrap(); - - assert_matches!(result, Ok(ExecOutput { checkpoint: StageCheckpoint { - block_number, - stage_checkpoint: Some(StageUnitCheckpoint::Headers(HeadersCheckpoint { - block_range: CheckpointBlockRange { - from, - to - }, - progress: EntitiesCheckpoint { - processed, - total, - } - })) - }, done: true }) if block_number == tip.number && - from == checkpoint && to == previous_stage && - // -1 because we don't need to download the local head - processed == checkpoint + headers.len() as u64 - 1 && total == tip.number); + processed == checkpoint + headers.len() as u64 - 1 && total == tip.number + ); assert!(runner.validate_execution(input, result.ok()).is_ok(), "validation failed"); } } diff --git a/crates/stages/src/stages/index_account_history.rs b/crates/stages/src/stages/index_account_history.rs index 045bfe04e8de..dab5eb3218ad 100644 --- a/crates/stages/src/stages/index_account_history.rs +++ b/crates/stages/src/stages/index_account_history.rs @@ -2,7 +2,7 @@ use crate::{ExecInput, ExecOutput, Stage, StageError, UnwindInput, UnwindOutput} use reth_db::database::Database; use reth_primitives::{ stage::{StageCheckpoint, StageId}, - PruneCheckpoint, PruneMode, PruneSegment, + PruneCheckpoint, PruneMode, PrunePurpose, PruneSegment, }; use reth_provider::{ AccountExtReader, DatabaseProviderRW, HistoryWriter, PruneCheckpointReader, @@ -12,7 +12,7 @@ use std::fmt::Debug; /// Stage is indexing history the account changesets generated in /// [`ExecutionStage`][crate::stages::ExecutionStage]. For more information -/// on index sharding take a look at [`reth_db::tables::AccountHistory`] +/// on index sharding take a look at [`reth_db::tables::AccountsHistory`] #[derive(Debug)] pub struct IndexAccountHistoryStage { /// Number of blocks after which the control @@ -49,7 +49,13 @@ impl Stage for IndexAccountHistoryStage { ) -> Result { if let Some((target_prunable_block, prune_mode)) = self .prune_mode - .map(|mode| mode.prune_target_block(input.target(), PruneSegment::AccountHistory)) + .map(|mode| { + mode.prune_target_block( + input.target(), + PruneSegment::AccountHistory, + PrunePurpose::User, + ) + }) .transpose()? .flatten() { @@ -123,10 +129,14 @@ mod tests { generators::{random_block_range, random_changeset_range, random_contract_account_range}, }; use reth_primitives::{address, Address, BlockNumber, B256}; + use reth_provider::providers::StaticFileWriter; use std::collections::BTreeMap; const ADDRESS: Address = address!("0000000000000000000000000000000000000001"); + const LAST_BLOCK_IN_FULL_SHARD: BlockNumber = NUM_OF_INDICES_IN_SHARD as BlockNumber; + const MAX_BLOCK: BlockNumber = NUM_OF_INDICES_IN_SHARD as BlockNumber + 2; + fn acc() -> AccountBeforeTx { AccountBeforeTx { address: ADDRESS, info: None } } @@ -136,17 +146,17 @@ mod tests { ShardedKey { key: ADDRESS, highest_block_number: shard_index } } - fn list(list: &[usize]) -> BlockNumberList { + fn list(list: &[u64]) -> BlockNumberList { BlockNumberList::new(list).unwrap() } fn cast( table: Vec<(ShardedKey
, BlockNumberList)>, - ) -> BTreeMap, Vec> { + ) -> BTreeMap, Vec> { table .into_iter() .map(|(k, v)| { - let v = v.iter(0).collect(); + let v = v.iter().collect(); (k, v) }) .collect() @@ -155,33 +165,29 @@ mod tests { fn partial_setup(db: &TestStageDB) { // setup db.commit(|tx| { - // we just need first and last - tx.put::( - 0, - StoredBlockBodyIndices { tx_count: 3, ..Default::default() }, - ) - .unwrap(); - - tx.put::( - 5, - StoredBlockBodyIndices { tx_count: 5, ..Default::default() }, - ) - .unwrap(); - - // setup changeset that are going to be applied to history index - tx.put::(4, acc()).unwrap(); - tx.put::(5, acc()).unwrap(); + for block in 0..=MAX_BLOCK { + tx.put::( + block, + StoredBlockBodyIndices { tx_count: 3, ..Default::default() }, + )?; + // setup changeset that is going to be applied to history index + tx.put::(block, acc())?; + } Ok(()) }) .unwrap() } - fn run(db: &TestStageDB, run_to: u64) { - let input = ExecInput { target: Some(run_to), ..Default::default() }; + fn run(db: &TestStageDB, run_to: u64, input_checkpoint: Option) { + let input = ExecInput { + target: Some(run_to), + checkpoint: input_checkpoint + .map(|block_number| StageCheckpoint { block_number, stage_checkpoint: None }), + }; let mut stage = IndexAccountHistoryStage::default(); let provider = db.factory.provider_rw().unwrap(); let out = stage.execute(&provider, input).unwrap(); - assert_eq!(out, ExecOutput { checkpoint: StageCheckpoint::new(5), done: true }); + assert_eq!(out, ExecOutput { checkpoint: StageCheckpoint::new(run_to), done: true }); provider.commit().unwrap(); } @@ -207,17 +213,17 @@ mod tests { partial_setup(&db); // run - run(&db, 5); + run(&db, 3, None); // verify - let table = cast(db.table::().unwrap()); - assert_eq!(table, BTreeMap::from([(shard(u64::MAX), vec![4, 5])])); + let table = cast(db.table::().unwrap()); + assert_eq!(table, BTreeMap::from([(shard(u64::MAX), vec![1, 2, 3])])); // unwind - unwind(&db, 5, 0); + unwind(&db, 3, 0); // verify initial state - let table = db.table::().unwrap(); + let table = db.table::().unwrap(); assert!(table.is_empty()); } @@ -229,55 +235,59 @@ mod tests { // setup partial_setup(&db); db.commit(|tx| { - tx.put::(shard(u64::MAX), list(&[1, 2, 3])).unwrap(); + tx.put::(shard(u64::MAX), list(&[1, 2, 3])).unwrap(); Ok(()) }) .unwrap(); // run - run(&db, 5); + run(&db, 5, Some(3)); // verify - let table = cast(db.table::().unwrap()); - assert_eq!(table, BTreeMap::from([(shard(u64::MAX), vec![1, 2, 3, 4, 5]),])); + let table = cast(db.table::().unwrap()); + assert_eq!(table, BTreeMap::from([(shard(u64::MAX), vec![1, 2, 3, 4, 5])])); // unwind - unwind(&db, 5, 0); + unwind(&db, 5, 3); // verify initial state - let table = cast(db.table::().unwrap()); - assert_eq!(table, BTreeMap::from([(shard(u64::MAX), vec![1, 2, 3]),])); + let table = cast(db.table::().unwrap()); + assert_eq!(table, BTreeMap::from([(shard(u64::MAX), vec![1, 2, 3])])); } #[tokio::test] async fn insert_index_to_full_shard() { // init let db = TestStageDB::default(); - let full_list = vec![3; NUM_OF_INDICES_IN_SHARD]; + let full_list = (1..=LAST_BLOCK_IN_FULL_SHARD).collect::>(); + assert_eq!(full_list.len(), NUM_OF_INDICES_IN_SHARD); // setup partial_setup(&db); db.commit(|tx| { - tx.put::(shard(u64::MAX), list(&full_list)).unwrap(); + tx.put::(shard(u64::MAX), list(&full_list)).unwrap(); Ok(()) }) .unwrap(); // run - run(&db, 5); + run(&db, LAST_BLOCK_IN_FULL_SHARD + 2, Some(LAST_BLOCK_IN_FULL_SHARD)); // verify - let table = cast(db.table::().unwrap()); + let table = cast(db.table::().unwrap()); assert_eq!( table, - BTreeMap::from([(shard(3), full_list.clone()), (shard(u64::MAX), vec![4, 5])]) + BTreeMap::from([ + (shard(LAST_BLOCK_IN_FULL_SHARD), full_list.clone()), + (shard(u64::MAX), vec![LAST_BLOCK_IN_FULL_SHARD + 1, LAST_BLOCK_IN_FULL_SHARD + 2]) + ]) ); // unwind - unwind(&db, 5, 0); + unwind(&db, LAST_BLOCK_IN_FULL_SHARD + 2, LAST_BLOCK_IN_FULL_SHARD); // verify initial state - let table = cast(db.table::().unwrap()); + let table = cast(db.table::().unwrap()); assert_eq!(table, BTreeMap::from([(shard(u64::MAX), full_list)])); } @@ -285,33 +295,33 @@ mod tests { async fn insert_index_to_fill_shard() { // init let db = TestStageDB::default(); - let mut close_full_list = vec![1; NUM_OF_INDICES_IN_SHARD - 2]; + let mut almost_full_list = (1..=LAST_BLOCK_IN_FULL_SHARD - 2).collect::>(); // setup partial_setup(&db); db.commit(|tx| { - tx.put::(shard(u64::MAX), list(&close_full_list)).unwrap(); + tx.put::(shard(u64::MAX), list(&almost_full_list)).unwrap(); Ok(()) }) .unwrap(); // run - run(&db, 5); + run(&db, LAST_BLOCK_IN_FULL_SHARD, Some(LAST_BLOCK_IN_FULL_SHARD - 2)); // verify - close_full_list.push(4); - close_full_list.push(5); - let table = cast(db.table::().unwrap()); - assert_eq!(table, BTreeMap::from([(shard(u64::MAX), close_full_list.clone()),])); + almost_full_list.push(LAST_BLOCK_IN_FULL_SHARD - 1); + almost_full_list.push(LAST_BLOCK_IN_FULL_SHARD); + let table = cast(db.table::().unwrap()); + assert_eq!(table, BTreeMap::from([(shard(u64::MAX), almost_full_list.clone())])); // unwind - unwind(&db, 5, 0); + unwind(&db, LAST_BLOCK_IN_FULL_SHARD, LAST_BLOCK_IN_FULL_SHARD - 2); // verify initial state - close_full_list.pop(); - close_full_list.pop(); - let table = cast(db.table::().unwrap()); - assert_eq!(table, BTreeMap::from([(shard(u64::MAX), close_full_list),])); + almost_full_list.pop(); + almost_full_list.pop(); + let table = cast(db.table::().unwrap()); + assert_eq!(table, BTreeMap::from([(shard(u64::MAX), almost_full_list)])); // verify initial state } @@ -320,76 +330,83 @@ mod tests { async fn insert_index_second_half_shard() { // init let db = TestStageDB::default(); - let mut close_full_list = vec![1; NUM_OF_INDICES_IN_SHARD - 1]; + let mut almost_full_list = (1..=LAST_BLOCK_IN_FULL_SHARD - 1).collect::>(); // setup partial_setup(&db); db.commit(|tx| { - tx.put::(shard(u64::MAX), list(&close_full_list)).unwrap(); + tx.put::(shard(u64::MAX), list(&almost_full_list)).unwrap(); Ok(()) }) .unwrap(); // run - run(&db, 5); + run(&db, LAST_BLOCK_IN_FULL_SHARD + 1, Some(LAST_BLOCK_IN_FULL_SHARD - 1)); // verify - close_full_list.push(4); - let table = cast(db.table::().unwrap()); + almost_full_list.push(LAST_BLOCK_IN_FULL_SHARD); + let table = cast(db.table::().unwrap()); assert_eq!( table, - BTreeMap::from([(shard(4), close_full_list.clone()), (shard(u64::MAX), vec![5])]) + BTreeMap::from([ + (shard(LAST_BLOCK_IN_FULL_SHARD), almost_full_list.clone()), + (shard(u64::MAX), vec![LAST_BLOCK_IN_FULL_SHARD + 1]) + ]) ); // unwind - unwind(&db, 5, 0); + unwind(&db, LAST_BLOCK_IN_FULL_SHARD, LAST_BLOCK_IN_FULL_SHARD - 1); // verify initial state - close_full_list.pop(); - let table = cast(db.table::().unwrap()); - assert_eq!(table, BTreeMap::from([(shard(u64::MAX), close_full_list),])); + almost_full_list.pop(); + let table = cast(db.table::().unwrap()); + assert_eq!(table, BTreeMap::from([(shard(u64::MAX), almost_full_list)])); } #[tokio::test] async fn insert_index_to_third_shard() { // init let db = TestStageDB::default(); - let full_list = vec![1; NUM_OF_INDICES_IN_SHARD]; + let full_list = (1..=LAST_BLOCK_IN_FULL_SHARD).collect::>(); // setup partial_setup(&db); db.commit(|tx| { - tx.put::(shard(1), list(&full_list)).unwrap(); - tx.put::(shard(2), list(&full_list)).unwrap(); - tx.put::(shard(u64::MAX), list(&[2, 3])).unwrap(); + tx.put::(shard(1), list(&full_list)).unwrap(); + tx.put::(shard(2), list(&full_list)).unwrap(); + tx.put::( + shard(u64::MAX), + list(&[LAST_BLOCK_IN_FULL_SHARD + 1]), + ) + .unwrap(); Ok(()) }) .unwrap(); - run(&db, 5); + run(&db, LAST_BLOCK_IN_FULL_SHARD + 2, Some(LAST_BLOCK_IN_FULL_SHARD + 1)); // verify - let table = cast(db.table::().unwrap()); + let table = cast(db.table::().unwrap()); assert_eq!( table, BTreeMap::from([ (shard(1), full_list.clone()), (shard(2), full_list.clone()), - (shard(u64::MAX), vec![2, 3, 4, 5]) + (shard(u64::MAX), vec![LAST_BLOCK_IN_FULL_SHARD + 1, LAST_BLOCK_IN_FULL_SHARD + 2]) ]) ); // unwind - unwind(&db, 5, 0); + unwind(&db, LAST_BLOCK_IN_FULL_SHARD + 2, LAST_BLOCK_IN_FULL_SHARD + 1); // verify initial state - let table = cast(db.table::().unwrap()); + let table = cast(db.table::().unwrap()); assert_eq!( table, BTreeMap::from([ (shard(1), full_list.clone()), (shard(2), full_list.clone()), - (shard(u64::MAX), vec![2, 3]) + (shard(u64::MAX), vec![LAST_BLOCK_IN_FULL_SHARD + 1]) ]) ); } @@ -415,9 +432,9 @@ mod tests { .unwrap(); // setup changeset that are going to be applied to history index - tx.put::(20, acc()).unwrap(); - tx.put::(36, acc()).unwrap(); - tx.put::(100, acc()).unwrap(); + tx.put::(20, acc()).unwrap(); + tx.put::(36, acc()).unwrap(); + tx.put::(100, acc()).unwrap(); Ok(()) }) .unwrap(); @@ -434,14 +451,14 @@ mod tests { provider.commit().unwrap(); // verify - let table = cast(db.table::().unwrap()); + let table = cast(db.table::().unwrap()); assert_eq!(table, BTreeMap::from([(shard(u64::MAX), vec![36, 100])])); // unwind unwind(&db, 20000, 0); // verify initial state - let table = db.table::().unwrap(); + let table = db.table::().unwrap(); assert!(table.is_empty()); } @@ -487,7 +504,7 @@ mod tests { let blocks = random_block_range(&mut rng, start..=end, B256::ZERO, 0..3); - let (transitions, _) = random_changeset_range( + let (changesets, _) = random_changeset_range( &mut rng, blocks.iter(), accounts.into_iter().map(|(addr, acc)| (addr, (acc, Vec::new()))), @@ -496,7 +513,7 @@ mod tests { ); // add block changeset from block 1. - self.db.insert_changesets(transitions, Some(start))?; + self.db.insert_changesets(changesets, Some(start))?; Ok(()) } @@ -520,7 +537,7 @@ mod tests { let provider = self.db.factory.provider()?; let mut changeset_cursor = - provider.tx_ref().cursor_read::()?; + provider.tx_ref().cursor_read::()?; let account_transitions = changeset_cursor.walk_range(start_block..=end_block)?.try_fold( @@ -541,8 +558,8 @@ mod tests { .iter() .chunks(sharded_key::NUM_OF_INDICES_IN_SHARD) .into_iter() - .map(|chunks| chunks.map(|i| *i as usize).collect::>()) - .collect::>(); + .map(|chunks| chunks.copied().collect::>()) + .collect::>>(); let last_chunk = chunks.pop(); chunks.into_iter().for_each(|list| { @@ -551,20 +568,17 @@ mod tests { address, *list.last().expect("Chuck does not return empty list") as BlockNumber, - ) as ShardedKey
, + ), list, ); }); if let Some(last_list) = last_chunk { - result.insert( - ShardedKey::new(address, u64::MAX) as ShardedKey
, - last_list, - ); + result.insert(ShardedKey::new(address, u64::MAX), last_list); }; } - let table = cast(self.db.table::().unwrap()); + let table = cast(self.db.table::().unwrap()); assert_eq!(table, result); } Ok(()) @@ -573,7 +587,7 @@ mod tests { impl UnwindStageTestRunner for IndexAccountHistoryTestRunner { fn validate_unwind(&self, _input: UnwindInput) -> Result<(), TestRunnerError> { - let table = self.db.table::().unwrap(); + let table = self.db.table::().unwrap(); assert!(table.is_empty()); Ok(()) } diff --git a/crates/stages/src/stages/index_storage_history.rs b/crates/stages/src/stages/index_storage_history.rs index 73eb53da4ca5..b6d79583db23 100644 --- a/crates/stages/src/stages/index_storage_history.rs +++ b/crates/stages/src/stages/index_storage_history.rs @@ -2,7 +2,7 @@ use crate::{ExecInput, ExecOutput, Stage, StageError, UnwindInput, UnwindOutput} use reth_db::{database::Database, models::BlockNumberAddress}; use reth_primitives::{ stage::{StageCheckpoint, StageId}, - PruneCheckpoint, PruneMode, PruneSegment, + PruneCheckpoint, PruneMode, PrunePurpose, PruneSegment, }; use reth_provider::{ DatabaseProviderRW, HistoryWriter, PruneCheckpointReader, PruneCheckpointWriter, StorageReader, @@ -11,7 +11,7 @@ use std::fmt::Debug; /// Stage is indexing history the account changesets generated in /// [`ExecutionStage`][crate::stages::ExecutionStage]. For more information -/// on index sharding take a look at [`reth_db::tables::StorageHistory`]. +/// on index sharding take a look at [`reth_db::tables::StoragesHistory`]. #[derive(Debug)] pub struct IndexStorageHistoryStage { /// Number of blocks after which the control @@ -48,7 +48,13 @@ impl Stage for IndexStorageHistoryStage { ) -> Result { if let Some((target_prunable_block, prune_mode)) = self .prune_mode - .map(|mode| mode.prune_target_block(input.target(), PruneSegment::StorageHistory)) + .map(|mode| { + mode.prune_target_block( + input.target(), + PruneSegment::StorageHistory, + PrunePurpose::User, + ) + }) .transpose()? .flatten() { @@ -121,19 +127,23 @@ mod tests { generators::{random_block_range, random_changeset_range, random_contract_account_range}, }; use reth_primitives::{address, b256, Address, BlockNumber, StorageEntry, B256, U256}; + use reth_provider::providers::StaticFileWriter; use std::collections::BTreeMap; const ADDRESS: Address = address!("0000000000000000000000000000000000000001"); const STORAGE_KEY: B256 = b256!("0000000000000000000000000000000000000000000000000000000000000001"); + const LAST_BLOCK_IN_FULL_SHARD: BlockNumber = NUM_OF_INDICES_IN_SHARD as BlockNumber; + const MAX_BLOCK: BlockNumber = NUM_OF_INDICES_IN_SHARD as BlockNumber + 2; + fn storage(key: B256) -> StorageEntry { // Value is not used in indexing stage. StorageEntry { key, value: U256::ZERO } } - fn trns(transition_id: u64) -> BlockNumberAddress { - BlockNumberAddress((transition_id, ADDRESS)) + fn block_number_address(block_number: u64) -> BlockNumberAddress { + BlockNumberAddress((block_number, ADDRESS)) } /// Shard for account @@ -144,17 +154,17 @@ mod tests { } } - fn list(list: &[usize]) -> BlockNumberList { + fn list(list: &[u64]) -> BlockNumberList { BlockNumberList::new(list).unwrap() } fn cast( table: Vec<(StorageShardedKey, BlockNumberList)>, - ) -> BTreeMap> { + ) -> BTreeMap> { table .into_iter() .map(|(k, v)| { - let v = v.iter(0).collect(); + let v = v.iter().collect(); (k, v) }) .collect() @@ -163,33 +173,32 @@ mod tests { fn partial_setup(db: &TestStageDB) { // setup db.commit(|tx| { - // we just need first and last - tx.put::( - 0, - StoredBlockBodyIndices { tx_count: 3, ..Default::default() }, - ) - .unwrap(); - - tx.put::( - 5, - StoredBlockBodyIndices { tx_count: 5, ..Default::default() }, - ) - .unwrap(); - - // setup changeset that are going to be applied to history index - tx.put::(trns(4), storage(STORAGE_KEY)).unwrap(); - tx.put::(trns(5), storage(STORAGE_KEY)).unwrap(); + for block in 0..=MAX_BLOCK { + tx.put::( + block, + StoredBlockBodyIndices { tx_count: 3, ..Default::default() }, + )?; + // setup changeset that is going to be applied to history index + tx.put::( + block_number_address(block), + storage(STORAGE_KEY), + )?; + } Ok(()) }) .unwrap() } - fn run(db: &TestStageDB, run_to: u64) { - let input = ExecInput { target: Some(run_to), ..Default::default() }; + fn run(db: &TestStageDB, run_to: u64, input_checkpoint: Option) { + let input = ExecInput { + target: Some(run_to), + checkpoint: input_checkpoint + .map(|block_number| StageCheckpoint { block_number, stage_checkpoint: None }), + }; let mut stage = IndexStorageHistoryStage::default(); let provider = db.factory.provider_rw().unwrap(); let out = stage.execute(&provider, input).unwrap(); - assert_eq!(out, ExecOutput { checkpoint: StageCheckpoint::new(5), done: true }); + assert_eq!(out, ExecOutput { checkpoint: StageCheckpoint::new(run_to), done: true }); provider.commit().unwrap(); } @@ -215,17 +224,17 @@ mod tests { partial_setup(&db); // run - run(&db, 5); + run(&db, 3, None); // verify - let table = cast(db.table::().unwrap()); - assert_eq!(table, BTreeMap::from([(shard(u64::MAX), vec![4, 5]),])); + let table = cast(db.table::().unwrap()); + assert_eq!(table, BTreeMap::from([(shard(u64::MAX), vec![1, 2, 3])])); // unwind unwind(&db, 5, 0); // verify initial state - let table = db.table::().unwrap(); + let table = db.table::().unwrap(); assert!(table.is_empty()); } @@ -237,58 +246,59 @@ mod tests { // setup partial_setup(&db); db.commit(|tx| { - tx.put::(shard(u64::MAX), list(&[1, 2, 3])).unwrap(); + tx.put::(shard(u64::MAX), list(&[1, 2, 3])).unwrap(); Ok(()) }) .unwrap(); // run - run(&db, 5); + run(&db, 5, Some(3)); // verify - let table = cast(db.table::().unwrap()); - assert_eq!(table, BTreeMap::from([(shard(u64::MAX), vec![1, 2, 3, 4, 5]),])); + let table = cast(db.table::().unwrap()); + assert_eq!(table, BTreeMap::from([(shard(u64::MAX), vec![1, 2, 3, 4, 5])])); // unwind - unwind(&db, 5, 0); + unwind(&db, 5, 3); // verify initial state - let table = cast(db.table::().unwrap()); - assert_eq!(table, BTreeMap::from([(shard(u64::MAX), vec![1, 2, 3]),])); + let table = cast(db.table::().unwrap()); + assert_eq!(table, BTreeMap::from([(shard(u64::MAX), vec![1, 2, 3])])); } #[tokio::test] async fn insert_index_to_full_shard() { // init let db = TestStageDB::default(); - let _input = ExecInput { target: Some(5), ..Default::default() }; - // change does not matter only that account is present in changeset. - let full_list = vec![3; NUM_OF_INDICES_IN_SHARD]; + let full_list = (1..=LAST_BLOCK_IN_FULL_SHARD).collect::>(); // setup partial_setup(&db); db.commit(|tx| { - tx.put::(shard(u64::MAX), list(&full_list)).unwrap(); + tx.put::(shard(u64::MAX), list(&full_list)).unwrap(); Ok(()) }) .unwrap(); // run - run(&db, 5); + run(&db, LAST_BLOCK_IN_FULL_SHARD + 2, Some(LAST_BLOCK_IN_FULL_SHARD)); // verify - let table = cast(db.table::().unwrap()); + let table = cast(db.table::().unwrap()); assert_eq!( table, - BTreeMap::from([(shard(3), full_list.clone()), (shard(u64::MAX), vec![4, 5])]) + BTreeMap::from([ + (shard(LAST_BLOCK_IN_FULL_SHARD), full_list.clone()), + (shard(u64::MAX), vec![LAST_BLOCK_IN_FULL_SHARD + 1, LAST_BLOCK_IN_FULL_SHARD + 2]) + ]) ); // unwind - unwind(&db, 5, 0); + unwind(&db, LAST_BLOCK_IN_FULL_SHARD + 2, LAST_BLOCK_IN_FULL_SHARD); // verify initial state - let table = cast(db.table::().unwrap()); + let table = cast(db.table::().unwrap()); assert_eq!(table, BTreeMap::from([(shard(u64::MAX), full_list)])); } @@ -296,33 +306,33 @@ mod tests { async fn insert_index_to_fill_shard() { // init let db = TestStageDB::default(); - let mut close_full_list = vec![1; NUM_OF_INDICES_IN_SHARD - 2]; + let mut almost_full_list = (1..=LAST_BLOCK_IN_FULL_SHARD - 2).collect::>(); // setup partial_setup(&db); db.commit(|tx| { - tx.put::(shard(u64::MAX), list(&close_full_list)).unwrap(); + tx.put::(shard(u64::MAX), list(&almost_full_list)).unwrap(); Ok(()) }) .unwrap(); // run - run(&db, 5); + run(&db, LAST_BLOCK_IN_FULL_SHARD, Some(LAST_BLOCK_IN_FULL_SHARD - 2)); // verify - close_full_list.push(4); - close_full_list.push(5); - let table = cast(db.table::().unwrap()); - assert_eq!(table, BTreeMap::from([(shard(u64::MAX), close_full_list.clone()),])); + almost_full_list.push(LAST_BLOCK_IN_FULL_SHARD - 1); + almost_full_list.push(LAST_BLOCK_IN_FULL_SHARD); + let table = cast(db.table::().unwrap()); + assert_eq!(table, BTreeMap::from([(shard(u64::MAX), almost_full_list.clone())])); // unwind - unwind(&db, 5, 0); + unwind(&db, LAST_BLOCK_IN_FULL_SHARD, LAST_BLOCK_IN_FULL_SHARD - 2); // verify initial state - close_full_list.pop(); - close_full_list.pop(); - let table = cast(db.table::().unwrap()); - assert_eq!(table, BTreeMap::from([(shard(u64::MAX), close_full_list),])); + almost_full_list.pop(); + almost_full_list.pop(); + let table = cast(db.table::().unwrap()); + assert_eq!(table, BTreeMap::from([(shard(u64::MAX), almost_full_list)])); // verify initial state } @@ -331,76 +341,83 @@ mod tests { async fn insert_index_second_half_shard() { // init let db = TestStageDB::default(); - let mut close_full_list = vec![1; NUM_OF_INDICES_IN_SHARD - 1]; + let mut close_full_list = (1..=LAST_BLOCK_IN_FULL_SHARD - 1).collect::>(); // setup partial_setup(&db); db.commit(|tx| { - tx.put::(shard(u64::MAX), list(&close_full_list)).unwrap(); + tx.put::(shard(u64::MAX), list(&close_full_list)).unwrap(); Ok(()) }) .unwrap(); // run - run(&db, 5); + run(&db, LAST_BLOCK_IN_FULL_SHARD + 1, Some(LAST_BLOCK_IN_FULL_SHARD - 1)); // verify - close_full_list.push(4); - let table = cast(db.table::().unwrap()); + close_full_list.push(LAST_BLOCK_IN_FULL_SHARD); + let table = cast(db.table::().unwrap()); assert_eq!( table, - BTreeMap::from([(shard(4), close_full_list.clone()), (shard(u64::MAX), vec![5])]) + BTreeMap::from([ + (shard(LAST_BLOCK_IN_FULL_SHARD), close_full_list.clone()), + (shard(u64::MAX), vec![LAST_BLOCK_IN_FULL_SHARD + 1]) + ]) ); // unwind - unwind(&db, 5, 0); + unwind(&db, LAST_BLOCK_IN_FULL_SHARD, LAST_BLOCK_IN_FULL_SHARD - 1); // verify initial state close_full_list.pop(); - let table = cast(db.table::().unwrap()); - assert_eq!(table, BTreeMap::from([(shard(u64::MAX), close_full_list),])); + let table = cast(db.table::().unwrap()); + assert_eq!(table, BTreeMap::from([(shard(u64::MAX), close_full_list)])); } #[tokio::test] async fn insert_index_to_third_shard() { // init let db = TestStageDB::default(); - let full_list = vec![1; NUM_OF_INDICES_IN_SHARD]; + let full_list = (1..=LAST_BLOCK_IN_FULL_SHARD).collect::>(); // setup partial_setup(&db); db.commit(|tx| { - tx.put::(shard(1), list(&full_list)).unwrap(); - tx.put::(shard(2), list(&full_list)).unwrap(); - tx.put::(shard(u64::MAX), list(&[2, 3])).unwrap(); + tx.put::(shard(1), list(&full_list)).unwrap(); + tx.put::(shard(2), list(&full_list)).unwrap(); + tx.put::( + shard(u64::MAX), + list(&[LAST_BLOCK_IN_FULL_SHARD + 1]), + ) + .unwrap(); Ok(()) }) .unwrap(); - run(&db, 5); + run(&db, LAST_BLOCK_IN_FULL_SHARD + 2, Some(LAST_BLOCK_IN_FULL_SHARD + 1)); // verify - let table = cast(db.table::().unwrap()); + let table = cast(db.table::().unwrap()); assert_eq!( table, BTreeMap::from([ (shard(1), full_list.clone()), (shard(2), full_list.clone()), - (shard(u64::MAX), vec![2, 3, 4, 5]) + (shard(u64::MAX), vec![LAST_BLOCK_IN_FULL_SHARD + 1, LAST_BLOCK_IN_FULL_SHARD + 2]) ]) ); // unwind - unwind(&db, 5, 0); + unwind(&db, LAST_BLOCK_IN_FULL_SHARD + 2, LAST_BLOCK_IN_FULL_SHARD + 1); // verify initial state - let table = cast(db.table::().unwrap()); + let table = cast(db.table::().unwrap()); assert_eq!( table, BTreeMap::from([ (shard(1), full_list.clone()), (shard(2), full_list.clone()), - (shard(u64::MAX), vec![2, 3]) + (shard(u64::MAX), vec![LAST_BLOCK_IN_FULL_SHARD + 1]) ]) ); } @@ -426,9 +443,12 @@ mod tests { .unwrap(); // setup changeset that are going to be applied to history index - tx.put::(trns(20), storage(STORAGE_KEY)).unwrap(); - tx.put::(trns(36), storage(STORAGE_KEY)).unwrap(); - tx.put::(trns(100), storage(STORAGE_KEY)).unwrap(); + tx.put::(block_number_address(20), storage(STORAGE_KEY)) + .unwrap(); + tx.put::(block_number_address(36), storage(STORAGE_KEY)) + .unwrap(); + tx.put::(block_number_address(100), storage(STORAGE_KEY)) + .unwrap(); Ok(()) }) .unwrap(); @@ -445,14 +465,14 @@ mod tests { provider.commit().unwrap(); // verify - let table = cast(db.table::().unwrap()); - assert_eq!(table, BTreeMap::from([(shard(u64::MAX), vec![36, 100]),])); + let table = cast(db.table::().unwrap()); + assert_eq!(table, BTreeMap::from([(shard(u64::MAX), vec![36, 100])])); // unwind unwind(&db, 20000, 0); // verify initial state - let table = db.table::().unwrap(); + let table = db.table::().unwrap(); assert!(table.is_empty()); } @@ -498,16 +518,16 @@ mod tests { let blocks = random_block_range(&mut rng, start..=end, B256::ZERO, 0..3); - let (transitions, _) = random_changeset_range( + let (changesets, _) = random_changeset_range( &mut rng, blocks.iter(), accounts.into_iter().map(|(addr, acc)| (addr, (acc, Vec::new()))), 0..3, - 0..256, + 0..u64::MAX, ); // add block changeset from block 1. - self.db.insert_changesets(transitions, Some(start))?; + self.db.insert_changesets(changesets, Some(start))?; Ok(()) } @@ -531,7 +551,7 @@ mod tests { let provider = self.db.factory.provider()?; let mut changeset_cursor = - provider.tx_ref().cursor_read::()?; + provider.tx_ref().cursor_read::()?; let storage_transitions = changeset_cursor .walk_range(BlockNumberAddress::range(start_block..=end_block))? @@ -556,8 +576,8 @@ mod tests { .iter() .chunks(sharded_key::NUM_OF_INDICES_IN_SHARD) .into_iter() - .map(|chunks| chunks.map(|i| *i as usize).collect::>()) - .collect::>(); + .map(|chunks| chunks.copied().collect::>()) + .collect::>>(); let last_chunk = chunks.pop(); chunks.into_iter().for_each(|list| { @@ -580,7 +600,7 @@ mod tests { }; } - let table = cast(self.db.table::().unwrap()); + let table = cast(self.db.table::().unwrap()); assert_eq!(table, result); } Ok(()) @@ -589,7 +609,7 @@ mod tests { impl UnwindStageTestRunner for IndexStorageHistoryTestRunner { fn validate_unwind(&self, _input: UnwindInput) -> Result<(), TestRunnerError> { - let table = self.db.table::().unwrap(); + let table = self.db.table::().unwrap(); assert!(table.is_empty()); Ok(()) } diff --git a/crates/stages/src/stages/merkle.rs b/crates/stages/src/stages/merkle.rs index b56b6ea7e0c8..b2761267b8d9 100644 --- a/crates/stages/src/stages/merkle.rs +++ b/crates/stages/src/stages/merkle.rs @@ -7,13 +7,13 @@ use reth_db::{ }; use reth_interfaces::consensus; use reth_primitives::{ - hex, stage::{EntitiesCheckpoint, MerkleCheckpoint, StageCheckpoint, StageId}, trie::StoredSubNode, BlockNumber, GotExpected, SealedHeader, B256, }; use reth_provider::{ - DatabaseProviderRW, HeaderProvider, ProviderError, StageCheckpointReader, StageCheckpointWriter, + DatabaseProviderRW, HeaderProvider, ProviderError, StageCheckpointReader, + StageCheckpointWriter, StatsReader, }; use reth_trie::{IntermediateStateRootState, StateRoot, StateRootProgress}; use std::fmt::Debug; @@ -106,7 +106,6 @@ impl MerkleStage { debug!( target: "sync::stages::merkle::exec", last_account_key = ?checkpoint.last_account_key, - last_walker_key = ?hex::encode(&checkpoint.last_walker_key), "Saving inner merkle checkpoint" ); checkpoint.to_compact(&mut buf); @@ -164,7 +163,6 @@ impl Stage for MerkleStage { current = ?current_block_number, target = ?to_block, last_account_key = ?checkpoint.last_account_key, - last_walker_key = ?hex::encode(&checkpoint.last_walker_key), "Continuing inner merkle checkpoint" ); @@ -187,8 +185,8 @@ impl Stage for MerkleStage { } .unwrap_or(EntitiesCheckpoint { processed: 0, - total: (provider.tx_ref().entries::()? + - provider.tx_ref().entries::()?) + total: (provider.count_entries::()? + + provider.count_entries::()?) as u64, }); @@ -233,8 +231,8 @@ impl Stage for MerkleStage { .map_err(|e| StageError::Fatal(Box::new(e)))?; updates.flush(provider.tx_ref())?; - let total_hashed_entries = (provider.tx_ref().entries::()? + - provider.tx_ref().entries::()?) + let total_hashed_entries = (provider.count_entries::()? + + provider.count_entries::()?) as u64; let entities_checkpoint = EntitiesCheckpoint { @@ -276,8 +274,8 @@ impl Stage for MerkleStage { let mut entities_checkpoint = input.checkpoint.entities_stage_checkpoint().unwrap_or(EntitiesCheckpoint { processed: 0, - total: (tx.entries::()? + - tx.entries::()?) as u64, + total: (tx.entries::()? + + tx.entries::()?) as u64, }); if input.unwind_to == 0 { @@ -339,8 +337,8 @@ fn validate_state_root( mod tests { use super::*; use crate::test_utils::{ - stage_test_suite_ext, ExecuteStageTestRunner, StageTestRunner, TestRunnerError, - TestStageDB, UnwindStageTestRunner, + stage_test_suite_ext, ExecuteStageTestRunner, StageTestRunner, StorageKind, + TestRunnerError, TestStageDB, UnwindStageTestRunner, }; use assert_matches::assert_matches; use reth_db::cursor::{DbCursorRO, DbCursorRW, DbDupCursorRO}; @@ -350,7 +348,10 @@ mod tests { random_block, random_block_range, random_changeset_range, random_contract_account_range, }, }; - use reth_primitives::{keccak256, stage::StageUnitCheckpoint, SealedBlock, StorageEntry, U256}; + use reth_primitives::{ + keccak256, stage::StageUnitCheckpoint, SealedBlock, StaticFileSegment, StorageEntry, U256, + }; + use reth_provider::providers::StaticFileWriter; use reth_trie::test_utils::{state_root, state_root_prehashed}; use std::collections::BTreeMap; @@ -388,8 +389,8 @@ mod tests { done: true }) if block_number == previous_stage && processed == total && total == ( - runner.db.table::().unwrap().len() + - runner.db.table::().unwrap().len() + runner.db.table::().unwrap().len() + + runner.db.table::().unwrap().len() ) as u64 ); @@ -428,8 +429,8 @@ mod tests { done: true }) if block_number == previous_stage && processed == total && total == ( - runner.db.table::().unwrap().len() + - runner.db.table::().unwrap().len() + runner.db.table::().unwrap().len() + + runner.db.table::().unwrap().len() ) as u64 ); @@ -469,6 +470,17 @@ mod tests { let end = input.target(); let mut rng = generators::rng(); + let mut preblocks = vec![]; + if stage_progress > 0 { + preblocks.append(&mut random_block_range( + &mut rng, + 0..=stage_progress - 1, + B256::ZERO, + 0..1, + )); + self.db.insert_blocks(preblocks.iter(), StorageKind::Static)?; + } + let num_of_accounts = 31; let accounts = random_contract_account_range(&mut rng, &mut (0..num_of_accounts)) .into_iter() @@ -478,8 +490,13 @@ mod tests { accounts.iter().map(|(addr, acc)| (*addr, (*acc, std::iter::empty()))), )?; - let SealedBlock { header, body, ommers, withdrawals } = - random_block(&mut rng, stage_progress, None, Some(0), None); + let SealedBlock { header, body, ommers, withdrawals } = random_block( + &mut rng, + stage_progress, + preblocks.last().map(|b| b.hash()), + Some(0), + None, + ); let mut header = header.unseal(); header.state_root = state_root( @@ -493,7 +510,8 @@ mod tests { let head_hash = sealed_head.hash(); let mut blocks = vec![sealed_head]; blocks.extend(random_block_range(&mut rng, start..=end, head_hash, 0..3)); - self.db.insert_blocks(blocks.iter(), None)?; + let last_block = blocks.last().cloned().unwrap(); + self.db.insert_blocks(blocks.iter(), StorageKind::Static)?; let (transitions, final_state) = random_changeset_range( &mut rng, @@ -509,8 +527,8 @@ mod tests { // Calculate state root let root = self.db.query(|tx| { let mut accounts = BTreeMap::default(); - let mut accounts_cursor = tx.cursor_read::()?; - let mut storage_cursor = tx.cursor_dup_read::()?; + let mut accounts_cursor = tx.cursor_read::()?; + let mut storage_cursor = tx.cursor_dup_read::()?; for entry in accounts_cursor.walk_range(..)? { let (key, account) = entry?; let mut storage_entries = Vec::new(); @@ -530,13 +548,16 @@ mod tests { Ok(state_root_prehashed(accounts.into_iter())) })?; - let last_block_number = end; - self.db.commit(|tx| { - let mut last_header = tx.get::(last_block_number)?.unwrap(); - last_header.state_root = root; - tx.put::(last_block_number, last_header)?; - Ok(()) - })?; + let static_file_provider = self.db.factory.static_file_provider(); + let mut writer = + static_file_provider.latest_writer(StaticFileSegment::Headers).unwrap(); + let mut last_header = last_block.header().clone(); + last_header.state_root = root; + + let hash = last_header.hash_slow(); + writer.prune_headers(1).unwrap(); + writer.append_header(last_header, U256::ZERO, hash).unwrap(); + writer.commit().unwrap(); Ok(blocks) } @@ -563,9 +584,9 @@ mod tests { self.db .commit(|tx| { let mut storage_changesets_cursor = - tx.cursor_dup_read::().unwrap(); + tx.cursor_dup_read::().unwrap(); let mut storage_cursor = - tx.cursor_dup_write::().unwrap(); + tx.cursor_dup_write::().unwrap(); let mut tree: BTreeMap> = BTreeMap::new(); @@ -599,7 +620,7 @@ mod tests { } let mut changeset_cursor = - tx.cursor_dup_write::().unwrap(); + tx.cursor_dup_write::().unwrap(); let mut rev_changeset_walker = changeset_cursor.walk_back(None).unwrap(); while let Some((block_number, account_before_tx)) = @@ -610,13 +631,13 @@ mod tests { } if let Some(acc) = account_before_tx.info { - tx.put::( + tx.put::( keccak256(account_before_tx.address), acc, ) .unwrap(); } else { - tx.delete::( + tx.delete::( keccak256(account_before_tx.address), None, ) diff --git a/crates/stages/src/stages/mod.rs b/crates/stages/src/stages/mod.rs index 6e63083d753a..0f260d90a73f 100644 --- a/crates/stages/src/stages/mod.rs +++ b/crates/stages/src/stages/mod.rs @@ -18,8 +18,6 @@ mod index_storage_history; mod merkle; /// The sender recovery stage. mod sender_recovery; -/// The total difficulty stage -mod total_difficulty; /// The transaction lookup stage mod tx_lookup; @@ -33,7 +31,6 @@ pub use index_account_history::*; pub use index_storage_history::*; pub use merkle::*; pub use sender_recovery::*; -pub use total_difficulty::*; pub use tx_lookup::*; #[cfg(test)] @@ -47,16 +44,17 @@ mod tests { tables, test_utils::TempDatabase, transaction::{DbTx, DbTxMut}, - AccountHistory, DatabaseEnv, + AccountsHistory, DatabaseEnv, }; use reth_interfaces::test_utils::generators::{self, random_block}; use reth_node_ethereum::EthEvmConfig; use reth_primitives::{ address, hex_literal::hex, keccak256, Account, Bytecode, ChainSpecBuilder, PruneMode, - PruneModes, SealedBlock, U256, + PruneModes, SealedBlock, StaticFileSegment, U256, }; use reth_provider::{ - AccountExtReader, BlockWriter, ProviderFactory, ReceiptProvider, StorageReader, + providers::StaticFileWriter, AccountExtReader, ProviderFactory, ReceiptProvider, + StorageReader, }; use reth_revm::EvmProcessorFactory; use std::sync::Arc; @@ -73,8 +71,12 @@ mod tests { let genesis = SealedBlock::decode(&mut genesis_rlp).unwrap(); let mut block_rlp = hex!("f90262f901f9a075c371ba45999d87f4542326910a11af515897aebce5265d3f6acd1f1161f82fa01dcc4de8dec75d7aab85b567b6ccd41ad312451b948a7413f0a142fd40d49347942adc25665018aa1fe0e6bc666dac8fc2697ff9baa098f2dcd87c8ae4083e7017a05456c14eea4b1db2032126e27b3b1563d57d7cc0a08151d548273f6683169524b66ca9fe338b9ce42bc3540046c828fd939ae23bcba03f4e5c2ec5b2170b711d97ee755c160457bb58d8daa338e835ec02ae6860bbabb901000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000083020000018502540be40082a8798203e800a00000000000000000000000000000000000000000000000000000000000000000880000000000000000f863f861800a8405f5e10094100000000000000000000000000000000000000080801ba07e09e26678ed4fac08a249ebe8ed680bf9051a5e14ad223e4b2b9d26e0208f37a05f6e3f188e3e6eab7d7d3b6568f5eac7d687b08d307d3154ccd8c87b4630509bc0").as_slice(); let block = SealedBlock::decode(&mut block_rlp).unwrap(); - provider_rw.insert_block(genesis.try_seal_with_senders().unwrap(), None).unwrap(); - provider_rw.insert_block(block.clone().try_seal_with_senders().unwrap(), None).unwrap(); + provider_rw + .insert_historical_block(genesis.try_seal_with_senders().unwrap(), None) + .unwrap(); + provider_rw + .insert_historical_block(block.clone().try_seal_with_senders().unwrap(), None) + .unwrap(); // Fill with bogus blocks to respect PruneMode distance. let mut head = block.hash(); @@ -82,8 +84,16 @@ mod tests { for block_number in 2..=tip { let nblock = random_block(&mut rng, block_number, Some(head), Some(0), Some(0)); head = nblock.hash(); - provider_rw.insert_block(nblock.try_seal_with_senders().unwrap(), None).unwrap(); + provider_rw + .insert_historical_block(nblock.try_seal_with_senders().unwrap(), None) + .unwrap(); } + provider_rw + .static_file_provider() + .latest_writer(StaticFileSegment::Headers) + .unwrap() + .commit() + .unwrap(); provider_rw.commit().unwrap(); // insert pre state @@ -165,8 +175,8 @@ mod tests { assert!(acc_indexing_stage.execute(&provider, input).is_err()); } else { acc_indexing_stage.execute(&provider, input).unwrap(); - let mut account_history: Cursor = - provider.tx_ref().cursor_read::().unwrap(); + let mut account_history: Cursor = + provider.tx_ref().cursor_read::().unwrap(); assert_eq!(account_history.walk(None).unwrap().count(), expect_num_acc_changesets); } @@ -183,7 +193,7 @@ mod tests { storage_indexing_stage.execute(&provider, input).unwrap(); let mut storage_history = - provider.tx_ref().cursor_read::().unwrap(); + provider.tx_ref().cursor_read::().unwrap(); assert_eq!( storage_history.walk(None).unwrap().count(), expect_num_storage_changesets diff --git a/crates/stages/src/stages/sender_recovery.rs b/crates/stages/src/stages/sender_recovery.rs index a758b9b6bc09..79e4263661de 100644 --- a/crates/stages/src/stages/sender_recovery.rs +++ b/crates/stages/src/stages/sender_recovery.rs @@ -1,28 +1,28 @@ use crate::{BlockErrorKind, ExecInput, ExecOutput, Stage, StageError, UnwindInput, UnwindOutput}; -use itertools::Itertools; use reth_db::{ - cursor::{DbCursorRO, DbCursorRW}, + cursor::DbCursorRW, database::Database, + static_file::TransactionMask, tables, transaction::{DbTx, DbTxMut}, - DatabaseError, RawKey, RawTable, RawValue, }; use reth_interfaces::consensus; use reth_primitives::{ keccak256, stage::{EntitiesCheckpoint, StageCheckpoint, StageId}, - Address, PruneSegment, TransactionSignedNoHash, TxNumber, + Address, PruneSegment, StaticFileSegment, TransactionSignedNoHash, TxNumber, }; use reth_provider::{ BlockReader, DatabaseProviderRW, HeaderProvider, ProviderError, PruneCheckpointReader, + StatsReader, }; -use std::{fmt::Debug, sync::mpsc}; +use std::{fmt::Debug, ops::Range, sync::mpsc}; use thiserror::Error; use tracing::*; /// The sender recovery stage iterates over existing transactions, /// recovers the transaction signer and stores them -/// in [`TxSenders`][reth_db::tables::TxSenders] table. +/// in [`TransactionSenders`][reth_db::tables::TransactionSenders] table. #[derive(Clone, Debug)] pub struct SenderRecoveryStage { /// The size of inserted items after which the control @@ -51,9 +51,8 @@ impl Stage for SenderRecoveryStage { /// Retrieve the range of transactions to iterate over by querying /// [`BlockBodyIndices`][reth_db::tables::BlockBodyIndices], - /// collect transactions within that range, - /// recover signer for each transaction and store entries in - /// the [`TxSenders`][reth_db::tables::TxSenders] table. + /// collect transactions within that range, recover signer for each transaction and store + /// entries in the [`TransactionSenders`][reth_db::tables::TransactionSenders] table. fn execute( &mut self, provider: &DatabaseProviderRW, @@ -80,48 +79,51 @@ impl Stage for SenderRecoveryStage { let tx = provider.tx_ref(); // Acquire the cursor for inserting elements - let mut senders_cursor = tx.cursor_write::()?; - - // Acquire the cursor over the transactions - let mut tx_cursor = tx.cursor_read::>()?; - // Walk the transactions from start to end index (inclusive) - let raw_tx_range = RawKey::new(tx_range.start)..RawKey::new(tx_range.end); - let tx_walker = tx_cursor.walk_range(raw_tx_range)?; + let mut senders_cursor = tx.cursor_write::()?; // Iterate over transactions in chunks info!(target: "sync::stages::sender_recovery", ?tx_range, "Recovering senders"); - // channels used to return result of sender recovery. - let mut channels = Vec::new(); - // Spawn recovery jobs onto the default rayon threadpool and send the result through the // channel. // - // We try to evenly divide the transactions to recover across all threads in the threadpool. - // Chunks are submitted instead of individual transactions to reduce the overhead of work - // stealing in the threadpool workers. - let chunk_size = self.commit_threshold as usize / rayon::current_num_threads(); - // prevents an edge case - // where the chunk size is either 0 or too small - // to gain anything from using more than 1 thread - let chunk_size = chunk_size.max(16); - - for chunk in &tx_walker.chunks(chunk_size) { + // Transactions are different size, so chunks will not all take the same processing time. If + // chunks are too big, there will be idle threads waiting for work. Choosing an + // arbitrary smaller value to make sure it doesn't happen. + let chunk_size = 100; + + let chunks = (tx_range.start..tx_range.end) + .step_by(chunk_size as usize) + .map(|start| start..std::cmp::min(start + chunk_size as u64, tx_range.end)) + .collect::>>(); + + let mut channels = Vec::with_capacity(chunks.len()); + for chunk_range in chunks { // An _unordered_ channel to receive results from a rayon job let (recovered_senders_tx, recovered_senders_rx) = mpsc::channel(); channels.push(recovered_senders_rx); - // Note: Unfortunate side-effect of how chunk is designed in itertools (it is not Send) - let chunk: Vec<_> = chunk.collect(); - // Spawn the sender recovery task onto the global rayon pool - // This task will send the results through the channel after it recovered the senders. + let static_file_provider = provider.static_file_provider().clone(); + + // Spawn the task onto the global rayon pool + // This task will send the results through the channel after it has read the transaction + // and calculated the sender. rayon::spawn(move || { let mut rlp_buf = Vec::with_capacity(128); - for entry in chunk { - rlp_buf.clear(); - let recovery_result = recover_sender(entry, &mut rlp_buf); - let _ = recovered_senders_tx.send(recovery_result); - } + let _ = static_file_provider.fetch_range_with_predicate( + StaticFileSegment::Transactions, + chunk_range, + |cursor, number| { + Ok(cursor + .get_one::>(number.into())? + .map(|tx| { + rlp_buf.clear(); + let _ = recovered_senders_tx + .send(recover_sender((number, tx), &mut rlp_buf)); + })) + }, + |_| true, + ); }); } @@ -135,7 +137,7 @@ impl Stage for SenderRecoveryStage { SenderRecoveryStageError::FailedRecovery(err) => { // get the block number for the bad transaction let block_number = tx - .get::(err.tx)? + .get::(err.tx)? .ok_or(ProviderError::BlockNumberForTransactionIndexNotFound)?; // fetch the sealed header so we can use it in the sender recovery @@ -178,7 +180,7 @@ impl Stage for SenderRecoveryStage { .block_body_indices(unwind_to)? .ok_or(ProviderError::BlockBodyIndicesNotFound(unwind_to))? .last_tx_num(); - provider.unwind_table_by_num::(latest_tx_id)?; + provider.unwind_table_by_num::(latest_tx_id)?; Ok(UnwindOutput { checkpoint: StageCheckpoint::new(unwind_to) @@ -187,15 +189,11 @@ impl Stage for SenderRecoveryStage { } } +#[inline] fn recover_sender( - entry: Result<(RawKey, RawValue), DatabaseError>, + (tx_id, tx): (TxNumber, TransactionSignedNoHash), rlp_buf: &mut Vec, ) -> Result<(u64, Address), Box> { - let (tx_id, transaction) = - entry.map_err(|e| Box::new(SenderRecoveryStageError::StageError(e.into())))?; - let tx_id = tx_id.key().expect("key to be formated"); - - let tx = transaction.value().expect("value to be formated"); tx.transaction.encode_without_signature(rlp_buf); // We call [Signature::recover_signer_unchecked] because transactions run in the pipeline are @@ -219,11 +217,11 @@ fn stage_checkpoint( .and_then(|checkpoint| checkpoint.tx_number) .unwrap_or_default(); Ok(EntitiesCheckpoint { - // If `TxSenders` table was pruned, we will have a number of entries in it not matching - // the actual number of processed transactions. To fix that, we add the number of pruned - // `TxSenders` entries. - processed: provider.tx_ref().entries::()? as u64 + pruned_entries, - total: provider.tx_ref().entries::()? as u64, + // If `TransactionSenders` table was pruned, we will have a number of entries in it not + // matching the actual number of processed transactions. To fix that, we add the + // number of pruned `TransactionSenders` entries. + processed: provider.count_entries::()? as u64 + pruned_entries, + total: provider.count_entries::()? as u64, }) } @@ -249,6 +247,7 @@ struct FailedSenderRecoveryError { #[cfg(test)] mod tests { use assert_matches::assert_matches; + use reth_db::cursor::DbCursorRO; use reth_interfaces::test_utils::{ generators, generators::{random_block, random_block_range}, @@ -257,12 +256,12 @@ mod tests { stage::StageUnitCheckpoint, BlockNumber, PruneCheckpoint, PruneMode, SealedBlock, TransactionSigned, B256, }; - use reth_provider::{PruneCheckpointWriter, TransactionsProvider}; + use reth_provider::{providers::StaticFileWriter, PruneCheckpointWriter, TransactionsProvider}; use super::*; use crate::test_utils::{ - stage_test_suite_ext, ExecuteStageTestRunner, StageTestRunner, TestRunnerError, - TestStageDB, UnwindStageTestRunner, + stage_test_suite_ext, ExecuteStageTestRunner, StageTestRunner, StorageKind, + TestRunnerError, TestStageDB, UnwindStageTestRunner, }; stage_test_suite_ext!(SenderRecoveryTestRunner, sender_recovery); @@ -293,7 +292,10 @@ mod tests { ) }) .collect::>(); - runner.db.insert_blocks(blocks.iter(), None).expect("failed to insert blocks"); + runner + .db + .insert_blocks(blocks.iter(), StorageKind::Static) + .expect("failed to insert blocks"); let rx = runner.execute(input); @@ -327,9 +329,17 @@ mod tests { // Manually seed once with full input range let seed = random_block_range(&mut rng, stage_progress + 1..=previous_stage, B256::ZERO, 0..4); // set tx count range high enough to hit the threshold - runner.db.insert_blocks(seed.iter(), None).expect("failed to seed execution"); - - let total_transactions = runner.db.table::().unwrap().len() as u64; + runner + .db + .insert_blocks(seed.iter(), StorageKind::Static) + .expect("failed to seed execution"); + + let total_transactions = runner + .db + .factory + .static_file_provider() + .count_entries::() + .unwrap() as u64; let first_input = ExecInput { target: Some(previous_stage), @@ -353,7 +363,8 @@ mod tests { ExecOutput { checkpoint: StageCheckpoint::new(expected_progress).with_entities_stage_checkpoint( EntitiesCheckpoint { - processed: runner.db.table::().unwrap().len() as u64, + processed: runner.db.table::().unwrap().len() + as u64, total: total_transactions } ), @@ -388,7 +399,7 @@ mod tests { let mut rng = generators::rng(); let blocks = random_block_range(&mut rng, 0..=100, B256::ZERO, 0..10); - db.insert_blocks(blocks.iter(), None).expect("insert blocks"); + db.insert_blocks(blocks.iter(), StorageKind::Static).expect("insert blocks"); let max_pruned_block = 30; let max_processed_block = 70; @@ -455,10 +466,11 @@ mod tests { /// # Panics /// - /// 1. If there are any entries in the [tables::TxSenders] table above a given block number. + /// 1. If there are any entries in the [tables::TransactionSenders] table above a given + /// block number. /// - /// 2. If the is no requested block entry in the bodies table, but [tables::TxSenders] is - /// not empty. + /// 2. If the is no requested block entry in the bodies table, but + /// [tables::TransactionSenders] is not empty. fn ensure_no_senders_by_block(&self, block: BlockNumber) -> Result<(), TestRunnerError> { let body_result = self .db @@ -467,11 +479,12 @@ mod tests { .block_body_indices(block)? .ok_or(ProviderError::BlockBodyIndicesNotFound(block)); match body_result { - Ok(body) => self - .db - .ensure_no_entry_above::(body.last_tx_num(), |key| key)?, + Ok(body) => self.db.ensure_no_entry_above::( + body.last_tx_num(), + |key| key, + )?, Err(_) => { - assert!(self.db.table_is_empty::()?); + assert!(self.db.table_is_empty::()?); } }; @@ -500,7 +513,7 @@ mod tests { let end = input.target(); let blocks = random_block_range(&mut rng, stage_progress..=end, B256::ZERO, 0..2); - self.db.insert_blocks(blocks.iter(), None)?; + self.db.insert_blocks(blocks.iter(), StorageKind::Static)?; Ok(blocks) } diff --git a/crates/stages/src/stages/total_difficulty.rs b/crates/stages/src/stages/total_difficulty.rs deleted file mode 100644 index 6e2c152c4a3f..000000000000 --- a/crates/stages/src/stages/total_difficulty.rs +++ /dev/null @@ -1,310 +0,0 @@ -use crate::{BlockErrorKind, ExecInput, ExecOutput, Stage, StageError, UnwindInput, UnwindOutput}; -use reth_db::{ - cursor::{DbCursorRO, DbCursorRW}, - database::Database, - tables, - transaction::{DbTx, DbTxMut}, - DatabaseError, -}; -use reth_interfaces::{consensus::Consensus, provider::ProviderError}; -use reth_primitives::{ - stage::{EntitiesCheckpoint, StageCheckpoint, StageId}, - U256, -}; -use reth_provider::DatabaseProviderRW; -use std::sync::Arc; -use tracing::*; - -/// The total difficulty stage. -/// -/// This stage walks over inserted headers and computes total difficulty -/// at each block. The entries are inserted into [`HeaderTD`][reth_db::tables::HeaderTD] -/// table. -#[derive(Debug, Clone)] -pub struct TotalDifficultyStage { - /// Consensus client implementation - consensus: Arc, - /// The number of table entries to commit at once - commit_threshold: u64, -} - -impl TotalDifficultyStage { - /// Create a new total difficulty stage - pub fn new(consensus: Arc) -> Self { - Self { consensus, commit_threshold: 100_000 } - } - - /// Set a commit threshold on total difficulty stage - pub fn with_commit_threshold(mut self, commit_threshold: u64) -> Self { - self.commit_threshold = commit_threshold; - self - } -} - -impl Stage for TotalDifficultyStage { - /// Return the id of the stage - fn id(&self) -> StageId { - StageId::TotalDifficulty - } - - /// Write total difficulty entries - fn execute( - &mut self, - provider: &DatabaseProviderRW, - input: ExecInput, - ) -> Result { - let tx = provider.tx_ref(); - if input.target_reached() { - return Ok(ExecOutput::done(input.checkpoint())) - } - - let (range, is_final_range) = input.next_block_range_with_threshold(self.commit_threshold); - let (start_block, end_block) = range.clone().into_inner(); - - debug!(target: "sync::stages::total_difficulty", start_block, end_block, "Commencing sync"); - - // Acquire cursor over total difficulty and headers tables - let mut cursor_td = tx.cursor_write::()?; - let mut cursor_headers = tx.cursor_read::()?; - - // Get latest total difficulty - let last_header_number = input.checkpoint().block_number; - let last_entry = cursor_td - .seek_exact(last_header_number)? - .ok_or(ProviderError::TotalDifficultyNotFound(last_header_number))?; - - let mut td: U256 = last_entry.1.into(); - debug!(target: "sync::stages::total_difficulty", ?td, block_number = last_header_number, "Last total difficulty entry"); - - // Walk over newly inserted headers, update & insert td - for entry in cursor_headers.walk_range(range)? { - let (block_number, header) = entry?; - td += header.difficulty; - - self.consensus.validate_header_with_total_difficulty(&header, td).map_err(|error| { - StageError::Block { - block: Box::new(header.seal_slow()), - error: BlockErrorKind::Validation(error), - } - })?; - cursor_td.append(block_number, td.into())?; - } - - Ok(ExecOutput { - checkpoint: StageCheckpoint::new(end_block) - .with_entities_stage_checkpoint(stage_checkpoint(provider)?), - done: is_final_range, - }) - } - - /// Unwind the stage. - fn unwind( - &mut self, - provider: &DatabaseProviderRW, - input: UnwindInput, - ) -> Result { - let (_, unwind_to, _) = input.unwind_block_range_with_threshold(self.commit_threshold); - - provider.unwind_table_by_num::(unwind_to)?; - - Ok(UnwindOutput { - checkpoint: StageCheckpoint::new(unwind_to) - .with_entities_stage_checkpoint(stage_checkpoint(provider)?), - }) - } -} - -fn stage_checkpoint( - provider: &DatabaseProviderRW, -) -> Result { - Ok(EntitiesCheckpoint { - processed: provider.tx_ref().entries::()? as u64, - total: provider.tx_ref().entries::()? as u64, - }) -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::test_utils::{ - stage_test_suite_ext, ExecuteStageTestRunner, StageTestRunner, TestRunnerError, - TestStageDB, UnwindStageTestRunner, - }; - use assert_matches::assert_matches; - use reth_interfaces::test_utils::{ - generators, - generators::{random_header, random_header_range}, - TestConsensus, - }; - use reth_primitives::{stage::StageUnitCheckpoint, BlockNumber, SealedHeader}; - use reth_provider::HeaderProvider; - - stage_test_suite_ext!(TotalDifficultyTestRunner, total_difficulty); - - #[tokio::test] - async fn execute_with_intermediate_commit() { - let threshold = 50; - let (stage_progress, previous_stage) = (1000, 1100); // input exceeds threshold - - let mut runner = TotalDifficultyTestRunner::default(); - runner.set_threshold(threshold); - - let first_input = ExecInput { - target: Some(previous_stage), - checkpoint: Some(StageCheckpoint::new(stage_progress)), - }; - - // Seed only once with full input range - runner.seed_execution(first_input).expect("failed to seed execution"); - - // Execute first time - let result = runner.execute(first_input).await.unwrap(); - let expected_progress = stage_progress + threshold; - assert_matches!( - result, - Ok(ExecOutput { checkpoint: StageCheckpoint { - block_number, - stage_checkpoint: Some(StageUnitCheckpoint::Entities(EntitiesCheckpoint { - processed, - total - })) - }, done: false }) if block_number == expected_progress && processed == 1 + threshold && - total == runner.db.table::().unwrap().len() as u64 - ); - - // Execute second time - let second_input = ExecInput { - target: Some(previous_stage), - checkpoint: Some(StageCheckpoint::new(expected_progress)), - }; - let result = runner.execute(second_input).await.unwrap(); - assert_matches!( - result, - Ok(ExecOutput { checkpoint: StageCheckpoint { - block_number, - stage_checkpoint: Some(StageUnitCheckpoint::Entities(EntitiesCheckpoint { - processed, - total - })) - }, done: true }) if block_number == previous_stage && processed == total && - total == runner.db.table::().unwrap().len() as u64 - ); - - assert!(runner.validate_execution(first_input, result.ok()).is_ok(), "validation failed"); - } - - struct TotalDifficultyTestRunner { - db: TestStageDB, - consensus: Arc, - commit_threshold: u64, - } - - impl Default for TotalDifficultyTestRunner { - fn default() -> Self { - Self { - db: Default::default(), - consensus: Arc::new(TestConsensus::default()), - commit_threshold: 500, - } - } - } - - impl StageTestRunner for TotalDifficultyTestRunner { - type S = TotalDifficultyStage; - - fn db(&self) -> &TestStageDB { - &self.db - } - - fn stage(&self) -> Self::S { - TotalDifficultyStage { - consensus: self.consensus.clone(), - commit_threshold: self.commit_threshold, - } - } - } - - impl ExecuteStageTestRunner for TotalDifficultyTestRunner { - type Seed = Vec; - - fn seed_execution(&mut self, input: ExecInput) -> Result { - let mut rng = generators::rng(); - let start = input.checkpoint().block_number; - let head = random_header(&mut rng, start, None); - self.db.insert_headers(std::iter::once(&head))?; - self.db.commit(|tx| { - let td: U256 = tx - .cursor_read::()? - .last()? - .map(|(_, v)| v) - .unwrap_or_default() - .into(); - tx.put::(head.number, (td + head.difficulty).into())?; - Ok(()) - })?; - - // use previous progress as seed size - let end = input.target.unwrap_or_default() + 1; - - if start + 1 >= end { - return Ok(Vec::default()) - } - - let mut headers = random_header_range(&mut rng, start + 1..end, head.hash()); - self.db.insert_headers(headers.iter())?; - headers.insert(0, head); - Ok(headers) - } - - /// Validate stored headers - fn validate_execution( - &self, - input: ExecInput, - output: Option, - ) -> Result<(), TestRunnerError> { - let initial_stage_progress = input.checkpoint().block_number; - match output { - Some(output) if output.checkpoint.block_number > initial_stage_progress => { - let provider = self.db.factory.provider()?; - - let mut header_cursor = provider.tx_ref().cursor_read::()?; - let (_, mut current_header) = header_cursor - .seek_exact(initial_stage_progress)? - .expect("no initial header"); - let mut td: U256 = provider - .header_td_by_number(initial_stage_progress)? - .expect("no initial td"); - - while let Some((next_key, next_header)) = header_cursor.next()? { - assert_eq!(current_header.number + 1, next_header.number); - td += next_header.difficulty; - assert_eq!( - provider.header_td_by_number(next_key)?.map(Into::into), - Some(td) - ); - current_header = next_header; - } - } - _ => self.check_no_td_above(initial_stage_progress)?, - }; - Ok(()) - } - } - - impl UnwindStageTestRunner for TotalDifficultyTestRunner { - fn validate_unwind(&self, input: UnwindInput) -> Result<(), TestRunnerError> { - self.check_no_td_above(input.unwind_to) - } - } - - impl TotalDifficultyTestRunner { - fn check_no_td_above(&self, block: BlockNumber) -> Result<(), TestRunnerError> { - self.db.ensure_no_entry_above::(block, |num| num)?; - Ok(()) - } - - fn set_threshold(&mut self, new_threshold: u64) { - self.commit_threshold = new_threshold; - } - } -} diff --git a/crates/stages/src/stages/tx_lookup.rs b/crates/stages/src/stages/tx_lookup.rs index d5c9202bab78..a619fe709b57 100644 --- a/crates/stages/src/stages/tx_lookup.rs +++ b/crates/stages/src/stages/tx_lookup.rs @@ -1,44 +1,52 @@ use crate::{ExecInput, ExecOutput, Stage, StageError, UnwindInput, UnwindOutput}; -use rayon::prelude::*; +use num_traits::Zero; use reth_db::{ cursor::{DbCursorRO, DbCursorRW}, database::Database, tables, transaction::{DbTx, DbTxMut}, + RawKey, RawValue, }; +use reth_etl::Collector; use reth_interfaces::provider::ProviderError; use reth_primitives::{ stage::{EntitiesCheckpoint, StageCheckpoint, StageId}, - PruneCheckpoint, PruneMode, PruneSegment, + PruneCheckpoint, PruneMode, PrunePurpose, PruneSegment, TxHash, TxNumber, }; use reth_provider::{ - BlockReader, DatabaseProviderRW, PruneCheckpointReader, PruneCheckpointWriter, - TransactionsProviderExt, + BlockReader, DatabaseProviderRW, PruneCheckpointReader, PruneCheckpointWriter, StatsReader, + TransactionsProvider, TransactionsProviderExt, }; +use std::sync::Arc; +use tempfile::TempDir; use tracing::*; /// The transaction lookup stage. /// /// This stage walks over the bodies table, and sets the transaction hash of each transaction in a /// block to the corresponding `BlockNumber` at each block. This is written to the -/// [`tables::TxHashNumber`] This is used for looking up changesets via the transaction hash. +/// [`tables::TransactionHashNumbers`] This is used for looking up changesets via the transaction +/// hash. +/// +/// It uses [`reth_etl::Collector`] to collect all entries before finally writing them to disk. #[derive(Debug, Clone)] pub struct TransactionLookupStage { - /// The number of lookup entries to commit at once - commit_threshold: u64, + /// The maximum number of lookup entries to hold in memory before pushing them to + /// [`reth_etl::Collector`]. + chunk_size: u64, prune_mode: Option, } impl Default for TransactionLookupStage { fn default() -> Self { - Self { commit_threshold: 5_000_000, prune_mode: None } + Self { chunk_size: 5_000_000, prune_mode: None } } } impl TransactionLookupStage { /// Create new instance of [TransactionLookupStage]. - pub fn new(commit_threshold: u64, prune_mode: Option) -> Self { - Self { commit_threshold, prune_mode } + pub fn new(chunk_size: u64, prune_mode: Option) -> Self { + Self { chunk_size, prune_mode } } } @@ -56,7 +64,13 @@ impl Stage for TransactionLookupStage { ) -> Result { if let Some((target_prunable_block, prune_mode)) = self .prune_mode - .map(|mode| mode.prune_target_block(input.target(), PruneSegment::TransactionLookup)) + .map(|mode| { + mode.prune_target_block( + input.target(), + PruneSegment::TransactionLookup, + PrunePurpose::User, + ) + }) .transpose()? .flatten() { @@ -86,43 +100,73 @@ impl Stage for TransactionLookupStage { return Ok(ExecOutput::done(input.checkpoint())) } - let (tx_range, block_range, is_final_range) = - input.next_block_range_with_transaction_threshold(provider, self.commit_threshold)?; - let end_block = *block_range.end(); + // 500MB temporary files + let mut hash_collector: Collector = + Collector::new(Arc::new(TempDir::new()?), 500 * (1024 * 1024)); - debug!(target: "sync::stages::transaction_lookup", ?tx_range, "Updating transaction lookup"); + debug!( + target: "sync::stages::transaction_lookup", + tx_range = ?input.checkpoint().block_number..=input.target(), + "Updating transaction lookup" + ); - let mut tx_list = provider.transaction_hashes_by_range(tx_range)?; + loop { + let (tx_range, block_range, is_final_range) = + input.next_block_range_with_transaction_threshold(provider, self.chunk_size)?; - // Sort before inserting the reverse lookup for hash -> tx_id. - tx_list.par_sort_unstable_by(|txa, txb| txa.0.cmp(&txb.0)); + let end_block = *block_range.end(); - let tx = provider.tx_ref(); - let mut txhash_cursor = tx.cursor_write::()?; - - // If the last inserted element in the database is equal or bigger than the first - // in our set, then we need to insert inside the DB. If it is smaller then last - // element in the DB, we can append to the DB. - // Append probably only ever happens during sync, on the first table insertion. - let insert = tx_list - .first() - .zip(txhash_cursor.last()?) - .map(|((first, _), (last, _))| first <= &last) - .unwrap_or_default(); - // if txhash_cursor.last() is None we will do insert. `zip` would return none if any item is - // none. if it is some and if first is smaller than last, we will do append. - for (tx_hash, id) in tx_list { - if insert { - txhash_cursor.insert(tx_hash, id)?; - } else { - txhash_cursor.append(tx_hash, id)?; + debug!(target: "sync::stages::transaction_lookup", ?tx_range, "Calculating transaction hashes"); + + for (key, value) in provider.transaction_hashes_by_range(tx_range)? { + hash_collector.insert(key, value); + } + + input.checkpoint = Some( + StageCheckpoint::new(end_block) + .with_entities_stage_checkpoint(stage_checkpoint(provider)?), + ); + + if is_final_range { + let append_only = + provider.count_entries::()?.is_zero(); + let mut txhash_cursor = provider + .tx_ref() + .cursor_write::>()?; + + let total_hashes = hash_collector.len(); + let interval = (total_hashes / 10).max(1); + for (index, hash_to_number) in hash_collector.iter()?.enumerate() { + let (hash, number) = hash_to_number?; + if index > 0 && index % interval == 0 { + debug!( + target: "sync::stages::transaction_lookup", + ?append_only, + progress = format!("{:.2}%", (index as f64 / total_hashes as f64) * 100.0), + "Inserting hashes" + ); + } + + if append_only { + txhash_cursor.append( + RawKey::::from_vec(hash), + RawValue::::from_vec(number), + )?; + } else { + txhash_cursor.insert( + RawKey::::from_vec(hash), + RawValue::::from_vec(number), + )?; + } + } + break } } Ok(ExecOutput { - checkpoint: StageCheckpoint::new(end_block) + checkpoint: StageCheckpoint::new(input.target()) .with_entities_stage_checkpoint(stage_checkpoint(provider)?), - done: is_final_range, + done: true, }) } @@ -133,12 +177,12 @@ impl Stage for TransactionLookupStage { input: UnwindInput, ) -> Result { let tx = provider.tx_ref(); - let (range, unwind_to, _) = input.unwind_block_range_with_threshold(self.commit_threshold); + let (range, unwind_to, _) = input.unwind_block_range_with_threshold(self.chunk_size); // Cursors to unwind tx hash to number let mut body_cursor = tx.cursor_read::()?; - let mut tx_hash_number_cursor = tx.cursor_write::()?; - let mut transaction_cursor = tx.cursor_read::()?; + let mut tx_hash_number_cursor = tx.cursor_write::()?; + let static_file_provider = provider.static_file_provider(); let mut rev_walker = body_cursor.walk_back(Some(*range.end()))?; while let Some((number, body)) = rev_walker.next().transpose()? { if number <= unwind_to { @@ -148,7 +192,7 @@ impl Stage for TransactionLookupStage { // Delete all transactions that belong to this block for tx_id in body.tx_num_range() { // First delete the transaction and hash to id mapping - if let Some((_, transaction)) = transaction_cursor.seek_exact(tx_id)? { + if let Some(transaction) = static_file_provider.transaction_by_id(tx_id)? { if tx_hash_number_cursor.seek_exact(transaction.hash())?.is_some() { tx_hash_number_cursor.delete_current()?; } @@ -173,11 +217,12 @@ fn stage_checkpoint( .map(|tx_number| tx_number + 1) .unwrap_or_default(); Ok(EntitiesCheckpoint { - // If `TxHashNumber` table was pruned, we will have a number of entries in it not matching - // the actual number of processed transactions. To fix that, we add the number of pruned - // `TxHashNumber` entries. - processed: provider.tx_ref().entries::()? as u64 + pruned_entries, - total: provider.tx_ref().entries::()? as u64, + // If `TransactionHashNumbers` table was pruned, we will have a number of entries in it not + // matching the actual number of processed transactions. To fix that, we add the + // number of pruned `TransactionHashNumbers` entries. + processed: provider.count_entries::()? as u64 + + pruned_entries, + total: provider.count_entries::()? as u64, }) } @@ -185,8 +230,8 @@ fn stage_checkpoint( mod tests { use super::*; use crate::test_utils::{ - stage_test_suite_ext, ExecuteStageTestRunner, StageTestRunner, TestRunnerError, - TestStageDB, UnwindStageTestRunner, + stage_test_suite_ext, ExecuteStageTestRunner, StageTestRunner, StorageKind, + TestRunnerError, TestStageDB, UnwindStageTestRunner, }; use assert_matches::assert_matches; use reth_interfaces::test_utils::{ @@ -194,7 +239,7 @@ mod tests { generators::{random_block, random_block_range}, }; use reth_primitives::{stage::StageUnitCheckpoint, BlockNumber, SealedBlock, B256}; - use reth_provider::TransactionsProvider; + use reth_provider::providers::StaticFileWriter; use std::ops::Sub; // Implement stage test suite. @@ -225,7 +270,10 @@ mod tests { ) }) .collect::>(); - runner.db.insert_blocks(blocks.iter(), None).expect("failed to insert blocks"); + runner + .db + .insert_blocks(blocks.iter(), StorageKind::Static) + .expect("failed to insert blocks"); let rx = runner.execute(input); @@ -241,79 +289,13 @@ mod tests { total })) }, done: true }) if block_number == previous_stage && processed == total && - total == runner.db.table::().unwrap().len() as u64 + total == runner.db.factory.static_file_provider().count_entries::().unwrap() as u64 ); // Validate the stage execution assert!(runner.validate_execution(input, result.ok()).is_ok(), "execution validation"); } - /// Execute the stage twice with input range that exceeds the commit threshold - #[tokio::test] - async fn execute_intermediate_commit_transaction_lookup() { - let threshold = 50; - let mut runner = TransactionLookupTestRunner::default(); - runner.set_commit_threshold(threshold); - let (stage_progress, previous_stage) = (1000, 1100); // input exceeds threshold - let first_input = ExecInput { - target: Some(previous_stage), - checkpoint: Some(StageCheckpoint::new(stage_progress)), - }; - let mut rng = generators::rng(); - - // Seed only once with full input range - let seed = - random_block_range(&mut rng, stage_progress + 1..=previous_stage, B256::ZERO, 0..4); // set tx count range high enough to hit the threshold - runner.db.insert_blocks(seed.iter(), None).expect("failed to seed execution"); - - let total_txs = runner.db.table::().unwrap().len() as u64; - - // Execute first time - let result = runner.execute(first_input).await.unwrap(); - let mut tx_count = 0; - let expected_progress = seed - .iter() - .find(|x| { - tx_count += x.body.len(); - tx_count as u64 > threshold - }) - .map(|x| x.number) - .unwrap_or(previous_stage); - assert_matches!(result, Ok(_)); - assert_eq!( - result.unwrap(), - ExecOutput { - checkpoint: StageCheckpoint::new(expected_progress).with_entities_stage_checkpoint( - EntitiesCheckpoint { - processed: runner.db.table::().unwrap().len() as u64, - total: total_txs - } - ), - done: false - } - ); - - // Execute second time to completion - runner.set_commit_threshold(u64::MAX); - let second_input = ExecInput { - target: Some(previous_stage), - checkpoint: Some(StageCheckpoint::new(expected_progress)), - }; - let result = runner.execute(second_input).await.unwrap(); - assert_matches!(result, Ok(_)); - assert_eq!( - result.as_ref().unwrap(), - &ExecOutput { - checkpoint: StageCheckpoint::new(previous_stage).with_entities_stage_checkpoint( - EntitiesCheckpoint { processed: total_txs, total: total_txs } - ), - done: true - } - ); - - assert!(runner.validate_execution(first_input, result.ok()).is_ok(), "validation failed"); - } - #[tokio::test] async fn execute_pruned_transaction_lookup() { let (previous_stage, prune_target, stage_progress) = (500, 400, 100); @@ -329,7 +311,10 @@ mod tests { // Seed only once with full input range let seed = random_block_range(&mut rng, stage_progress + 1..=previous_stage, B256::ZERO, 0..2); - runner.db.insert_blocks(seed.iter(), None).expect("failed to seed execution"); + runner + .db + .insert_blocks(seed.iter(), StorageKind::Static) + .expect("failed to seed execution"); runner.set_prune_mode(PruneMode::Before(prune_target)); @@ -347,7 +332,7 @@ mod tests { total })) }, done: true }) if block_number == previous_stage && processed == total && - total == runner.db.table::().unwrap().len() as u64 + total == runner.db.factory.static_file_provider().count_entries::().unwrap() as u64 ); // Validate the stage execution @@ -360,7 +345,7 @@ mod tests { let mut rng = generators::rng(); let blocks = random_block_range(&mut rng, 0..=100, B256::ZERO, 0..10); - db.insert_blocks(blocks.iter(), None).expect("insert blocks"); + db.insert_blocks(blocks.iter(), StorageKind::Static).expect("insert blocks"); let max_pruned_block = 30; let max_processed_block = 70; @@ -411,32 +396,28 @@ mod tests { struct TransactionLookupTestRunner { db: TestStageDB, - commit_threshold: u64, + chunk_size: u64, prune_mode: Option, } impl Default for TransactionLookupTestRunner { fn default() -> Self { - Self { db: TestStageDB::default(), commit_threshold: 1000, prune_mode: None } + Self { db: TestStageDB::default(), chunk_size: 1000, prune_mode: None } } } impl TransactionLookupTestRunner { - fn set_commit_threshold(&mut self, threshold: u64) { - self.commit_threshold = threshold; - } - fn set_prune_mode(&mut self, prune_mode: PruneMode) { self.prune_mode = Some(prune_mode); } /// # Panics /// - /// 1. If there are any entries in the [tables::TxHashNumber] table above a given block - /// number. + /// 1. If there are any entries in the [tables::TransactionHashNumbers] table above a given + /// block number. /// - /// 2. If the is no requested block entry in the bodies table, but [tables::TxHashNumber] is - /// not empty. + /// 2. If the is no requested block entry in the bodies table, but + /// [tables::TransactionHashNumbers] is not empty. fn ensure_no_hash_by_block(&self, number: BlockNumber) -> Result<(), TestRunnerError> { let body_result = self .db @@ -445,12 +426,14 @@ mod tests { .block_body_indices(number)? .ok_or(ProviderError::BlockBodyIndicesNotFound(number)); match body_result { - Ok(body) => self.db.ensure_no_entry_above_by_value::( - body.last_tx_num(), - |key| key, - )?, + Ok(body) => { + self.db.ensure_no_entry_above_by_value::( + body.last_tx_num(), + |key| key, + )? + } Err(_) => { - assert!(self.db.table_is_empty::()?); + assert!(self.db.table_is_empty::()?); } }; @@ -466,10 +449,7 @@ mod tests { } fn stage(&self) -> Self::S { - TransactionLookupStage { - commit_threshold: self.commit_threshold, - prune_mode: self.prune_mode, - } + TransactionLookupStage { chunk_size: self.chunk_size, prune_mode: self.prune_mode } } } @@ -482,7 +462,7 @@ mod tests { let mut rng = generators::rng(); let blocks = random_block_range(&mut rng, stage_progress + 1..=end, B256::ZERO, 0..2); - self.db.insert_blocks(blocks.iter(), None)?; + self.db.insert_blocks(blocks.iter(), StorageKind::Static)?; Ok(blocks) } @@ -498,7 +478,11 @@ mod tests { if let Some((target_prunable_block, _)) = self .prune_mode .map(|mode| { - mode.prune_target_block(input.target(), PruneSegment::TransactionLookup) + mode.prune_target_block( + input.target(), + PruneSegment::TransactionLookup, + PrunePurpose::User, + ) }) .transpose() .expect("prune target block for transaction lookup") diff --git a/crates/stages/src/test_utils/macros.rs b/crates/stages/src/test_utils/macros.rs index 8cc3e9cda1b1..0ffb16f08611 100644 --- a/crates/stages/src/test_utils/macros.rs +++ b/crates/stages/src/test_utils/macros.rs @@ -13,6 +13,8 @@ macro_rules! stage_test_suite { // Run stage execution let result = runner.execute(input).await; + runner.db().factory.static_file_provider().commit().unwrap(); + // Check that the result is returned and the stage does not panic. // The return result with empty db is stage-specific. assert_matches::assert_matches!(result, Ok(_)); @@ -44,6 +46,8 @@ macro_rules! stage_test_suite { // Assert the successful result let result = rx.await.unwrap(); + runner.db().factory.static_file_provider().commit().unwrap(); + assert_matches::assert_matches!( result, Ok(ExecOutput { done, checkpoint }) @@ -72,6 +76,8 @@ macro_rules! stage_test_suite { // Run stage unwind let rx = runner.unwind(input).await; + runner.db().factory.static_file_provider().commit().unwrap(); + assert_matches::assert_matches!( rx, Ok(UnwindOutput { checkpoint }) if checkpoint.block_number == input.unwind_to @@ -104,6 +110,8 @@ macro_rules! stage_test_suite { // Assert the successful execution result let result = rx.await.unwrap(); + runner.db().factory.static_file_provider().commit().unwrap(); + assert_matches::assert_matches!( result, Ok(ExecOutput { done, checkpoint }) @@ -171,6 +179,8 @@ macro_rules! stage_test_suite_ext { // Assert the successful result let result = rx.await.unwrap(); + runner.db().factory.static_file_provider().commit().unwrap(); + assert_matches::assert_matches!( result, Ok(ExecOutput { done, checkpoint }) diff --git a/crates/stages/src/test_utils/mod.rs b/crates/stages/src/test_utils/mod.rs index f48aa46d5003..dd788bca74e3 100644 --- a/crates/stages/src/test_utils/mod.rs +++ b/crates/stages/src/test_utils/mod.rs @@ -13,7 +13,7 @@ pub(crate) use runner::{ }; mod test_db; -pub use test_db::TestStageDB; +pub use test_db::{StorageKind, TestStageDB}; mod stage; pub use stage::TestStage; diff --git a/crates/stages/src/test_utils/test_db.rs b/crates/stages/src/test_utils/test_db.rs index 54af31db0cc5..c349137f4a4f 100644 --- a/crates/stages/src/test_utils/test_db.rs +++ b/crates/stages/src/test_utils/test_db.rs @@ -5,7 +5,9 @@ use reth_db::{ models::{AccountBeforeTx, StoredBlockBodyIndices}, table::Table, tables, - test_utils::{create_test_rw_db, create_test_rw_db_with_path, TempDatabase}, + test_utils::{ + create_test_rw_db, create_test_rw_db_with_path, create_test_static_files_dir, TempDatabase, + }, transaction::{DbTx, DbTxMut}, DatabaseEnv, DatabaseError as DbError, }; @@ -14,7 +16,10 @@ use reth_primitives::{ keccak256, Account, Address, BlockNumber, Receipt, SealedBlock, SealedHeader, StorageEntry, TxHash, TxNumber, B256, MAINNET, U256, }; -use reth_provider::{HistoryWriter, ProviderFactory}; +use reth_provider::{ + providers::{StaticFileProviderRWRefMut, StaticFileWriter}, + HistoryWriter, ProviderError, ProviderFactory, +}; use std::{collections::BTreeMap, path::Path, sync::Arc}; /// Test database that is used for testing stage implementations. @@ -26,13 +31,27 @@ pub struct TestStageDB { impl Default for TestStageDB { /// Create a new instance of [TestStageDB] fn default() -> Self { - Self { factory: ProviderFactory::new(create_test_rw_db(), MAINNET.clone()) } + Self { + factory: ProviderFactory::new( + create_test_rw_db(), + MAINNET.clone(), + create_test_static_files_dir(), + ) + .unwrap(), + } } } impl TestStageDB { pub fn new(path: &Path) -> Self { - Self { factory: ProviderFactory::new(create_test_rw_db_with_path(path), MAINNET.clone()) } + Self { + factory: ProviderFactory::new( + create_test_rw_db_with_path(path), + MAINNET.clone(), + create_test_static_files_dir(), + ) + .unwrap(), + } } /// Invoke a callback with transaction committing it afterwards @@ -112,75 +131,126 @@ impl TestStageDB { }) } - /// Inserts a single [SealedHeader] into the corresponding tables of the headers stage. - fn insert_header(tx: &TX, header: &SealedHeader) -> Result<(), DbError> { - tx.put::(header.number, header.hash())?; + /// Insert header to static file if `writer` exists, otherwise to DB. + pub fn insert_header( + writer: Option<&mut StaticFileProviderRWRefMut<'_>>, + tx: &TX, + header: &SealedHeader, + td: U256, + ) -> ProviderResult<()> { + if let Some(writer) = writer { + writer.append_header(header.header().clone(), td, header.hash())?; + } else { + tx.put::(header.number, header.hash())?; + tx.put::(header.number, td.into())?; + tx.put::(header.number, header.header().clone())?; + } + tx.put::(header.hash(), header.number)?; - tx.put::(header.number, header.clone().unseal()) + Ok(()) + } + + fn insert_headers_inner<'a, I, const TD: bool>(&self, headers: I) -> ProviderResult<()> + where + I: Iterator, + { + let provider = self.factory.static_file_provider(); + let mut writer = provider.latest_writer(reth_primitives::StaticFileSegment::Headers)?; + let tx = self.factory.provider_rw()?.into_tx(); + let mut td = U256::ZERO; + + for header in headers { + if TD { + td += header.difficulty; + } + Self::insert_header(Some(&mut writer), &tx, header, td)?; + } + + writer.commit()?; + tx.commit()?; + + Ok(()) } - /// Insert ordered collection of [SealedHeader] into the corresponding tables + /// Insert ordered collection of [SealedHeader] into the corresponding static file and tables /// that are supposed to be populated by the headers stage. pub fn insert_headers<'a, I>(&self, headers: I) -> ProviderResult<()> where I: Iterator, { - self.commit(|tx| { - Ok(headers.into_iter().try_for_each(|header| Self::insert_header(tx, header))?) - }) + self.insert_headers_inner::(headers) } - /// Inserts total difficulty of headers into the corresponding tables. + /// Inserts total difficulty of headers into the corresponding static file and tables. /// /// Superset functionality of [TestStageDB::insert_headers]. pub fn insert_headers_with_td<'a, I>(&self, headers: I) -> ProviderResult<()> where I: Iterator, { - self.commit(|tx| { - let mut td = U256::ZERO; - headers.into_iter().try_for_each(|header| { - Self::insert_header(tx, header)?; - td += header.difficulty; - Ok(tx.put::(header.number, td.into())?) - }) - }) + self.insert_headers_inner::(headers) } /// Insert ordered collection of [SealedBlock] into corresponding tables. /// Superset functionality of [TestStageDB::insert_headers]. /// + /// If tx_offset is set to `None`, then transactions will be stored on static files, otherwise + /// database. + /// /// Assumes that there's a single transition for each transaction (i.e. no block rewards). - pub fn insert_blocks<'a, I>(&self, blocks: I, tx_offset: Option) -> ProviderResult<()> + pub fn insert_blocks<'a, I>(&self, blocks: I, storage_kind: StorageKind) -> ProviderResult<()> where I: Iterator, { - self.commit(|tx| { - let mut next_tx_num = tx_offset.unwrap_or_default(); - - blocks.into_iter().try_for_each(|block| { - Self::insert_header(tx, &block.header)?; - // Insert into body tables. - let block_body_indices = StoredBlockBodyIndices { - first_tx_num: next_tx_num, - tx_count: block.body.len() as u64, - }; - - if !block.body.is_empty() { - tx.put::( - block_body_indices.last_tx_num(), - block.number, - )?; + let provider = self.factory.static_file_provider(); + + let mut txs_writer = storage_kind.is_static().then(|| { + provider.latest_writer(reth_primitives::StaticFileSegment::Transactions).unwrap() + }); + + let mut headers_writer = + provider.latest_writer(reth_primitives::StaticFileSegment::Headers)?; + let tx = self.factory.provider_rw().unwrap().into_tx(); + + let mut next_tx_num = storage_kind.tx_offset(); + blocks.into_iter().try_for_each(|block| { + Self::insert_header(Some(&mut headers_writer), &tx, &block.header, U256::ZERO)?; + + // Insert into body tables. + let block_body_indices = StoredBlockBodyIndices { + first_tx_num: next_tx_num, + tx_count: block.body.len() as u64, + }; + + if !block.body.is_empty() { + tx.put::( + block_body_indices.last_tx_num(), + block.number, + )?; + } + tx.put::(block.number, block_body_indices)?; + + let res = block.body.iter().try_for_each(|body_tx| { + if let Some(txs_writer) = &mut txs_writer { + txs_writer.append_transaction(next_tx_num, body_tx.clone().into())?; + } else { + tx.put::(next_tx_num, body_tx.clone().into())? } - tx.put::(block.number, block_body_indices)?; + next_tx_num += 1; + Ok::<(), ProviderError>(()) + }); - block.body.iter().try_for_each(|body_tx| { - tx.put::(next_tx_num, body_tx.clone().into())?; - next_tx_num += 1; - Ok(()) - }) - }) - }) + if let Some(txs_writer) = &mut txs_writer { + txs_writer.increment_block(reth_primitives::StaticFileSegment::Transactions)?; + } + res + })?; + + tx.commit()?; + if let Some(txs_writer) = &mut txs_writer { + txs_writer.commit()?; + } + headers_writer.commit() } pub fn insert_tx_hash_numbers(&self, tx_hash_numbers: I) -> ProviderResult<()> @@ -190,7 +260,7 @@ impl TestStageDB { self.commit(|tx| { tx_hash_numbers.into_iter().try_for_each(|(tx_hash, tx_num)| { // Insert into tx hash numbers table. - Ok(tx.put::(tx_hash, tx_num)?) + Ok(tx.put::(tx_hash, tx_num)?) }) }) } @@ -215,7 +285,7 @@ impl TestStageDB { self.commit(|tx| { transaction_senders.into_iter().try_for_each(|(tx_num, sender)| { // Insert into receipts table. - Ok(tx.put::(tx_num, sender)?) + Ok(tx.put::(tx_num, sender)?) }) }) } @@ -232,7 +302,7 @@ impl TestStageDB { // Insert into account tables. tx.put::(address, account)?; - tx.put::(hashed_address, account)?; + tx.put::(hashed_address, account)?; // Insert into storage tables. storage.into_iter().filter(|e| e.value != U256::ZERO).try_for_each(|entry| { @@ -248,7 +318,7 @@ impl TestStageDB { } cursor.upsert(address, entry)?; - let mut cursor = tx.cursor_dup_write::()?; + let mut cursor = tx.cursor_dup_write::()?; if cursor .seek_by_key_subkey(hashed_address, hashed_entry.key)? .filter(|e| e.key == hashed_entry.key) @@ -279,7 +349,7 @@ impl TestStageDB { changeset.into_iter().try_for_each(|(address, old_account, old_storage)| { let block = offset + block as u64; // Insert into account changeset. - tx.put::( + tx.put::( block, AccountBeforeTx { address, info: Some(old_account) }, )?; @@ -288,7 +358,7 @@ impl TestStageDB { // Insert into storage changeset. old_storage.into_iter().try_for_each(|entry| { - Ok(tx.put::(block_address, entry)?) + Ok(tx.put::(block_address, entry)?) }) }) }) @@ -319,3 +389,28 @@ impl TestStageDB { Ok(()) } } + +/// Used to identify where to store data when setting up a test. +#[derive(Debug)] +pub enum StorageKind { + Database(Option), + Static, +} + +impl StorageKind { + #[allow(dead_code)] + fn is_database(&self) -> bool { + matches!(self, Self::Database(_)) + } + + fn is_static(&self) -> bool { + matches!(self, Self::Static) + } + + fn tx_offset(&self) -> u64 { + if let Self::Database(offset) = self { + return offset.unwrap_or_default() + } + 0 + } +} diff --git a/crates/snapshot/Cargo.toml b/crates/static-file/Cargo.toml similarity index 83% rename from crates/snapshot/Cargo.toml rename to crates/static-file/Cargo.toml index a082c01135e3..8c7128455cb0 100644 --- a/crates/snapshot/Cargo.toml +++ b/crates/static-file/Cargo.toml @@ -1,12 +1,12 @@ [package] -name = "reth-snapshot" +name = "reth-static-file" version.workspace = true edition.workspace = true rust-version.workspace = true license.workspace = true homepage.workspace = true repository.workspace = true -description = "Snapshotting implementation" +description = "Static file producer implementation" [lints] workspace = true @@ -18,14 +18,17 @@ reth-db.workspace = true reth-provider.workspace = true reth-interfaces.workspace = true reth-nippy-jar.workspace = true +reth-tokio-util.workspace = true # async tokio = { workspace = true, features = ["sync"] } +tokio-stream.workspace = true # misc thiserror.workspace = true tracing.workspace = true clap = { workspace = true, features = ["derive"], optional = true } +rayon.workspace = true [dev-dependencies] # reth diff --git a/crates/static-file/README.md b/crates/static-file/README.md new file mode 100644 index 000000000000..1d455475a595 --- /dev/null +++ b/crates/static-file/README.md @@ -0,0 +1,88 @@ +# StaticFile + +## Overview + +Data that has reached a finalized state and won't undergo further changes (essentially frozen) should be read without concerns of modification. This makes it unsuitable for traditional databases. + +This crate aims to copy this data from the current database to multiple static files, aggregated by block ranges. At every 500_000th block new static files are created. + +Below are two diagrams illustrating the processes of creating static files (custom format: `NippyJar`) and querying them. A glossary is also provided to explain the different (linked) components involved in these processes. + +
+ Creation diagram (StaticFileProducer) + +```mermaid +graph TD; + I("BLOCK_HEIGHT % 500_000 == 0")--triggers-->SP(StaticFileProducer) + SP --> |triggers| SH["create_static_file(block_range, StaticFileSegment::Headers)"] + SP --> |triggers| ST["create_static_file(block_range, StaticFileSegment::Transactions)"] + SP --> |triggers| SR["create_static_file(block_range, StaticFileSegment::Receipts)"] + SP --> |triggers| ETC["create_static_file(block_range, ...)"] + SH --> CS["create_static_file::< T >(DatabaseCursor)"] + ST --> CS + SR --> CS + ETC --> CS + CS --> |create| IF(NippyJar::InclusionFilters) + CS -- iterates --> DC(DatabaseCursor) -->HN{HasNext} + HN --> |true| NJC(NippyJar::Compression) + NJC --> HN + NJC --store--> NJ + HN --> |false| NJ + IF --store--> NJ(NippyJar) + NJ --freeze--> F(File) + F--"on success"--> SP1(StaticFileProducer) + SP1 --"sends BLOCK_HEIGHT"--> HST(HighestStaticFileTracker) + HST --"read by"-->Pruner + HST --"read by"-->DatabaseProvider + HST --"read by"-->SnapsotProvider + HST --"read by"-->ProviderFactory + +``` +
+ + +
+ Query diagram (Provider) + +```mermaid +graph TD; + RPC-->P + P("Provider::header(block_number)")-->PF(ProviderFactory) + PF--shares-->SP1("Arc(StaticFileProvider)") + SP1--shares-->PD(DatabaseProvider) + PF--creates-->PD + PD--check `HighestStaticFileTracker`-->PD + PD-->DC1{block_number
>
highest static_file block} + DC1 --> |true| PD1("DatabaseProvider::header(block_number)") + DC1 --> |false| ASP("StaticFileProvider::header(block_number)") + PD1 --> MDBX + ASP --find correct jar and creates--> JP("StaticFileJarProvider::header(block_number)") + JP --"creates"-->SC(StaticFileCursor) + SC --".get_one< HeaderMask< Header > >(number)"--->NJC("NippyJarCursor") + NJC--".row_by_number(row_index, mask)"-->NJ[NippyJar] + NJ--"&[u8]"-->NJC + NJC--"&[u8]"-->SC + SC--"Header"--> JP + JP--"Header"--> ASP +``` +
+ + +### Glossary +In descending order of abstraction hierarchy: + +[`StaticFileProducer`](../../crates/static_file/src/static_file_producer.rs#L20): A `reth` background service that **copies** data from the database to new static-file files when the block height reaches a certain threshold (e.g., `500_000th`). Upon completion, it dispatches a notification about the higher static file block to `HighestStaticFileTracker` channel. **It DOES NOT remove data from the database.** + +[`HighestStaticFileTracker`](../../crates/static_file/src/static_file_producer.rs#L22): A channel utilized by `StaticFileProducer` to announce the newest static_file block to all components with a listener: `Pruner` (to know which additional tables can be pruned) and `DatabaseProvider` (to know which data can be queried from the static files). + +[`StaticFileProvider`](../../crates/storage/provider/src/providers/static_file/manager.rs#L15) A provider similar to `DatabaseProvider`, **managing all existing static_file files** and selecting the optimal one (by range and segment type) to fulfill a request. **A single instance is shared across all components and should be instantiated only once within `ProviderFactory`**. An immutable reference is given everytime `ProviderFactory` creates a new `DatabaseProvider`. + +[`StaticFileJarProvider`](../../crates/storage/provider/src/providers/static_file/jar.rs#L42) A provider similar to `DatabaseProvider` that provides access to a **single static_file file**. + +[`StaticFileCursor`](../../crates/storage/db/src/static_file/cursor.rs#L12) An elevated abstraction of `NippyJarCursor` for simplified access. It associates the bitmasks with type decoding. For instance, `cursor.get_two::>(tx_number)` would yield `Tx` and `Signature`, eliminating the need to manage masks or invoke a decoder/decompressor. + +[`StaticFileSegment`](../../crates/primitives/src/static_file/segment.rs#L10) Each static_file file only contains data of a specific segment, e.g., `Headers`, `Transactions`, or `Receipts`. + +[`NippyJarCursor`](../../crates/storage/nippy-jar/src/cursor.rs#L12) Accessor of data in a `NippyJar` file. It enables queries either by row number (e.g., block number 1) or by a predefined key not part of the file (e.g., transaction hashes). If a file has multiple columns (e.g., `Tx | TxSender | Signature`), and one wishes to access only one of the column values, this can be accomplished by bitmasks. (e.g., for `TxSender`, the mask would be `0b010`). + +[`NippyJar`](../../crates/storage/nippy-jar/src/lib.rs#57) A create-only file format. No data can be appended after creation. It supports multiple columns, compression (e.g., Zstd (with and without dictionaries), lz4, uncompressed) and inclusion filters (e.g., cuckoo filter: `is hash X part of this dataset`). StaticFiles are organized by block ranges. (e.g., `TransactionStaticFile_499_999.jar` contains a transaction per row for all transactions from block `0` to block `499_999`). For more check the struct documentation. diff --git a/crates/static-file/src/event.rs b/crates/static-file/src/event.rs new file mode 100644 index 000000000000..1a2ca31b207b --- /dev/null +++ b/crates/static-file/src/event.rs @@ -0,0 +1,19 @@ +use crate::StaticFileTargets; +use std::time::Duration; + +/// An event emitted by a [StaticFileProducer][crate::StaticFileProducer]. +#[derive(Debug, PartialEq, Eq, Clone)] +pub enum StaticFileProducerEvent { + /// Emitted when static file producer started running. + Started { + /// Targets that will be moved to static files + targets: StaticFileTargets, + }, + /// Emitted when static file producer finished running. + Finished { + /// Targets that were moved to static files + targets: StaticFileTargets, + /// Time it took to run the static file producer + elapsed: Duration, + }, +} diff --git a/crates/snapshot/src/lib.rs b/crates/static-file/src/lib.rs similarity index 58% rename from crates/snapshot/src/lib.rs rename to crates/static-file/src/lib.rs index 1673aa0b8e51..2c6c11dfd4ff 100644 --- a/crates/snapshot/src/lib.rs +++ b/crates/static-file/src/lib.rs @@ -1,4 +1,4 @@ -//! Snapshotting implementation. +//! Static file producer implementation. #![doc( html_logo_url = "https://raw.githubusercontent.com/paradigmxyz/reth/main/assets/reth-docs.png", @@ -7,11 +7,11 @@ )] #![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] -mod error; +mod event; pub mod segments; -mod snapshotter; +mod static_file_producer; -pub use error::SnapshotterError; -pub use snapshotter::{ - HighestSnapshotsTracker, SnapshotTargets, Snapshotter, SnapshotterResult, SnapshotterWithResult, +pub use event::StaticFileProducerEvent; +pub use static_file_producer::{ + StaticFileProducer, StaticFileProducerResult, StaticFileProducerWithResult, StaticFileTargets, }; diff --git a/crates/static-file/src/segments/headers.rs b/crates/static-file/src/segments/headers.rs new file mode 100644 index 000000000000..960b95897d5a --- /dev/null +++ b/crates/static-file/src/segments/headers.rs @@ -0,0 +1,128 @@ +use crate::segments::{dataset_for_compression, prepare_jar, Segment, SegmentHeader}; +use reth_db::{ + cursor::DbCursorRO, database::Database, static_file::create_static_file_T1_T2_T3, tables, + transaction::DbTx, RawKey, RawTable, +}; +use reth_interfaces::provider::ProviderResult; +use reth_primitives::{static_file::SegmentConfig, BlockNumber, StaticFileSegment}; +use reth_provider::{ + providers::{StaticFileProvider, StaticFileWriter}, + DatabaseProviderRO, +}; +use std::{ops::RangeInclusive, path::Path}; + +/// Static File segment responsible for [StaticFileSegment::Headers] part of data. +#[derive(Debug, Default)] +pub struct Headers; + +impl Segment for Headers { + fn segment(&self) -> StaticFileSegment { + StaticFileSegment::Headers + } + + fn copy_to_static_files( + &self, + provider: DatabaseProviderRO, + static_file_provider: StaticFileProvider, + block_range: RangeInclusive, + ) -> ProviderResult<()> { + let mut static_file_writer = + static_file_provider.get_writer(*block_range.start(), StaticFileSegment::Headers)?; + + let mut headers_cursor = provider.tx_ref().cursor_read::()?; + let headers_walker = headers_cursor.walk_range(block_range.clone())?; + + let mut header_td_cursor = + provider.tx_ref().cursor_read::()?; + let header_td_walker = header_td_cursor.walk_range(block_range.clone())?; + + let mut canonical_headers_cursor = + provider.tx_ref().cursor_read::()?; + let canonical_headers_walker = canonical_headers_cursor.walk_range(block_range)?; + + for ((header_entry, header_td_entry), canonical_header_entry) in + headers_walker.zip(header_td_walker).zip(canonical_headers_walker) + { + let (header_block, header) = header_entry?; + let (header_td_block, header_td) = header_td_entry?; + let (canonical_header_block, canonical_header) = canonical_header_entry?; + + debug_assert_eq!(header_block, header_td_block); + debug_assert_eq!(header_td_block, canonical_header_block); + + let _static_file_block = + static_file_writer.append_header(header, header_td.0, canonical_header)?; + debug_assert_eq!(_static_file_block, header_block); + } + + Ok(()) + } + + fn create_static_file_file( + &self, + provider: &DatabaseProviderRO, + directory: &Path, + config: SegmentConfig, + block_range: RangeInclusive, + ) -> ProviderResult<()> { + let range_len = block_range.clone().count(); + let jar = prepare_jar::( + provider, + directory, + StaticFileSegment::Headers, + config, + block_range.clone(), + range_len, + || { + Ok([ + dataset_for_compression::( + provider, + &block_range, + range_len, + )?, + dataset_for_compression::( + provider, + &block_range, + range_len, + )?, + dataset_for_compression::( + provider, + &block_range, + range_len, + )?, + ]) + }, + )?; + + // Generate list of hashes for filters & PHF + let mut cursor = provider.tx_ref().cursor_read::>()?; + let mut hashes = None; + if config.filters.has_filters() { + hashes = Some( + cursor + .walk(Some(RawKey::from(*block_range.start())))? + .take(range_len) + .map(|row| row.map(|(_key, value)| value.into_value()).map_err(|e| e.into())), + ); + } + + create_static_file_T1_T2_T3::< + tables::Headers, + tables::HeaderTerminalDifficulties, + tables::CanonicalHeaders, + BlockNumber, + SegmentHeader, + >( + provider.tx_ref(), + block_range, + None, + // We already prepared the dictionary beforehand + None::>>>, + hashes, + range_len, + jar, + )?; + + Ok(()) + } +} diff --git a/crates/static-file/src/segments/mod.rs b/crates/static-file/src/segments/mod.rs new file mode 100644 index 000000000000..7cad895aed7b --- /dev/null +++ b/crates/static-file/src/segments/mod.rs @@ -0,0 +1,116 @@ +//! StaticFile segment implementations and utilities. + +mod transactions; +pub use transactions::Transactions; + +mod headers; +pub use headers::Headers; + +mod receipts; +pub use receipts::Receipts; + +use reth_db::{ + cursor::DbCursorRO, database::Database, table::Table, transaction::DbTx, RawKey, RawTable, +}; +use reth_interfaces::provider::ProviderResult; +use reth_nippy_jar::NippyJar; +use reth_primitives::{ + static_file::{ + find_fixed_range, Compression, Filters, InclusionFilter, PerfectHashingFunction, + SegmentConfig, SegmentHeader, + }, + BlockNumber, StaticFileSegment, +}; +use reth_provider::{providers::StaticFileProvider, DatabaseProviderRO, TransactionsProviderExt}; +use std::{ops::RangeInclusive, path::Path}; + +pub(crate) type Rows = [Vec>; COLUMNS]; + +/// A segment represents moving some portion of the data to static files. +pub trait Segment: Send + Sync { + /// Returns the [`StaticFileSegment`]. + fn segment(&self) -> StaticFileSegment; + + /// Move data to static files for the provided block range. [StaticFileProvider] will handle the + /// management of and writing to files. + fn copy_to_static_files( + &self, + provider: DatabaseProviderRO, + static_file_provider: StaticFileProvider, + block_range: RangeInclusive, + ) -> ProviderResult<()>; + + /// Create a static file of data for the provided block range. The `directory` parameter + /// determines the static file's save location. + fn create_static_file_file( + &self, + provider: &DatabaseProviderRO, + directory: &Path, + config: SegmentConfig, + block_range: RangeInclusive, + ) -> ProviderResult<()>; +} + +/// Returns a [`NippyJar`] according to the desired configuration. The `directory` parameter +/// determines the static file's save location. +pub(crate) fn prepare_jar( + provider: &DatabaseProviderRO, + directory: impl AsRef, + segment: StaticFileSegment, + segment_config: SegmentConfig, + block_range: RangeInclusive, + total_rows: usize, + prepare_compression: impl Fn() -> ProviderResult>, +) -> ProviderResult> { + let tx_range = match segment { + StaticFileSegment::Headers => None, + StaticFileSegment::Receipts | StaticFileSegment::Transactions => { + Some(provider.transaction_range_by_block_range(block_range.clone())?.into()) + } + }; + + let mut nippy_jar = NippyJar::new( + COLUMNS, + &directory.as_ref().join(segment.filename(&find_fixed_range(*block_range.end())).as_str()), + SegmentHeader::new(block_range.clone().into(), Some(block_range.into()), tx_range, segment), + ); + + nippy_jar = match segment_config.compression { + Compression::Lz4 => nippy_jar.with_lz4(), + Compression::Zstd => nippy_jar.with_zstd(false, 0), + Compression::ZstdWithDictionary => { + let dataset = prepare_compression()?; + + nippy_jar = nippy_jar.with_zstd(true, 5_000_000); + nippy_jar.prepare_compression(dataset.to_vec())?; + nippy_jar + } + Compression::Uncompressed => nippy_jar, + }; + + if let Filters::WithFilters(inclusion_filter, phf) = segment_config.filters { + nippy_jar = match inclusion_filter { + InclusionFilter::Cuckoo => nippy_jar.with_cuckoo_filter(total_rows), + }; + nippy_jar = match phf { + PerfectHashingFunction::Fmph => nippy_jar.with_fmph(), + PerfectHashingFunction::GoFmph => nippy_jar.with_gofmph(), + }; + } + + Ok(nippy_jar) +} + +/// Generates the dataset to train a zstd dictionary with the most recent rows (at most 1000). +pub(crate) fn dataset_for_compression>( + provider: &DatabaseProviderRO, + range: &RangeInclusive, + range_len: usize, +) -> ProviderResult>> { + let mut cursor = provider.tx_ref().cursor_read::>()?; + Ok(cursor + .walk_back(Some(RawKey::from(*range.end())))? + .take(range_len.min(1000)) + .map(|row| row.map(|(_key, value)| value.into_value()).expect("should exist")) + .collect::>()) +} diff --git a/crates/static-file/src/segments/receipts.rs b/crates/static-file/src/segments/receipts.rs new file mode 100644 index 000000000000..5934edf8768e --- /dev/null +++ b/crates/static-file/src/segments/receipts.rs @@ -0,0 +1,107 @@ +use crate::segments::{dataset_for_compression, prepare_jar, Segment}; +use reth_db::{ + cursor::DbCursorRO, database::Database, static_file::create_static_file_T1, tables, + transaction::DbTx, +}; +use reth_interfaces::provider::{ProviderError, ProviderResult}; +use reth_primitives::{ + static_file::{SegmentConfig, SegmentHeader}, + BlockNumber, StaticFileSegment, TxNumber, +}; +use reth_provider::{ + providers::{StaticFileProvider, StaticFileWriter}, + BlockReader, DatabaseProviderRO, TransactionsProviderExt, +}; +use std::{ops::RangeInclusive, path::Path}; + +/// Static File segment responsible for [StaticFileSegment::Receipts] part of data. +#[derive(Debug, Default)] +pub struct Receipts; + +impl Segment for Receipts { + fn segment(&self) -> StaticFileSegment { + StaticFileSegment::Receipts + } + + fn copy_to_static_files( + &self, + provider: DatabaseProviderRO, + static_file_provider: StaticFileProvider, + block_range: RangeInclusive, + ) -> ProviderResult<()> { + let mut static_file_writer = + static_file_provider.get_writer(*block_range.start(), StaticFileSegment::Receipts)?; + + for block in block_range { + let _static_file_block = + static_file_writer.increment_block(StaticFileSegment::Receipts)?; + debug_assert_eq!(_static_file_block, block); + + let block_body_indices = provider + .block_body_indices(block)? + .ok_or(ProviderError::BlockBodyIndicesNotFound(block))?; + + let mut receipts_cursor = provider.tx_ref().cursor_read::()?; + let receipts_walker = receipts_cursor.walk_range(block_body_indices.tx_num_range())?; + + for entry in receipts_walker { + let (tx_number, receipt) = entry?; + + static_file_writer.append_receipt(tx_number, receipt)?; + } + } + + Ok(()) + } + + fn create_static_file_file( + &self, + provider: &DatabaseProviderRO, + directory: &Path, + config: SegmentConfig, + block_range: RangeInclusive, + ) -> ProviderResult<()> { + let tx_range = provider.transaction_range_by_block_range(block_range.clone())?; + let tx_range_len = tx_range.clone().count(); + + let jar = prepare_jar::( + provider, + directory, + StaticFileSegment::Receipts, + config, + block_range, + tx_range_len, + || { + Ok([dataset_for_compression::( + provider, + &tx_range, + tx_range_len, + )?]) + }, + )?; + + // Generate list of hashes for filters & PHF + let mut hashes = None; + if config.filters.has_filters() { + hashes = Some( + provider + .transaction_hashes_by_range(*tx_range.start()..(*tx_range.end() + 1))? + .into_iter() + .map(|(tx, _)| Ok(tx)), + ); + } + + create_static_file_T1::( + provider.tx_ref(), + tx_range, + None, + // We already prepared the dictionary beforehand + None::>>>, + hashes, + tx_range_len, + jar, + )?; + + Ok(()) + } +} diff --git a/crates/static-file/src/segments/transactions.rs b/crates/static-file/src/segments/transactions.rs new file mode 100644 index 000000000000..b8a6928b3201 --- /dev/null +++ b/crates/static-file/src/segments/transactions.rs @@ -0,0 +1,111 @@ +use crate::segments::{dataset_for_compression, prepare_jar, Segment}; +use reth_db::{ + cursor::DbCursorRO, database::Database, static_file::create_static_file_T1, tables, + transaction::DbTx, +}; +use reth_interfaces::provider::{ProviderError, ProviderResult}; +use reth_primitives::{ + static_file::{SegmentConfig, SegmentHeader}, + BlockNumber, StaticFileSegment, TxNumber, +}; +use reth_provider::{ + providers::{StaticFileProvider, StaticFileWriter}, + BlockReader, DatabaseProviderRO, TransactionsProviderExt, +}; +use std::{ops::RangeInclusive, path::Path}; + +/// Static File segment responsible for [StaticFileSegment::Transactions] part of data. +#[derive(Debug, Default)] +pub struct Transactions; + +impl Segment for Transactions { + fn segment(&self) -> StaticFileSegment { + StaticFileSegment::Transactions + } + + /// Write transactions from database table [tables::Transactions] to static files with segment + /// [StaticFileSegment::Transactions] for the provided block range. + fn copy_to_static_files( + &self, + provider: DatabaseProviderRO, + static_file_provider: StaticFileProvider, + block_range: RangeInclusive, + ) -> ProviderResult<()> { + let mut static_file_writer = static_file_provider + .get_writer(*block_range.start(), StaticFileSegment::Transactions)?; + + for block in block_range { + let _static_file_block = + static_file_writer.increment_block(StaticFileSegment::Transactions)?; + debug_assert_eq!(_static_file_block, block); + + let block_body_indices = provider + .block_body_indices(block)? + .ok_or(ProviderError::BlockBodyIndicesNotFound(block))?; + + let mut transactions_cursor = + provider.tx_ref().cursor_read::()?; + let transactions_walker = + transactions_cursor.walk_range(block_body_indices.tx_num_range())?; + + for entry in transactions_walker { + let (tx_number, transaction) = entry?; + + static_file_writer.append_transaction(tx_number, transaction)?; + } + } + + Ok(()) + } + + fn create_static_file_file( + &self, + provider: &DatabaseProviderRO, + directory: &Path, + config: SegmentConfig, + block_range: RangeInclusive, + ) -> ProviderResult<()> { + let tx_range = provider.transaction_range_by_block_range(block_range.clone())?; + let tx_range_len = tx_range.clone().count(); + + let jar = prepare_jar::( + provider, + directory, + StaticFileSegment::Transactions, + config, + block_range, + tx_range_len, + || { + Ok([dataset_for_compression::( + provider, + &tx_range, + tx_range_len, + )?]) + }, + )?; + + // Generate list of hashes for filters & PHF + let mut hashes = None; + if config.filters.has_filters() { + hashes = Some( + provider + .transaction_hashes_by_range(*tx_range.start()..(*tx_range.end() + 1))? + .into_iter() + .map(|(tx, _)| Ok(tx)), + ); + } + + create_static_file_T1::( + provider.tx_ref(), + tx_range, + None, + // We already prepared the dictionary beforehand + None::>>>, + hashes, + tx_range_len, + jar, + )?; + + Ok(()) + } +} diff --git a/crates/static-file/src/static_file_producer.rs b/crates/static-file/src/static_file_producer.rs new file mode 100644 index 000000000000..52b115e9fee8 --- /dev/null +++ b/crates/static-file/src/static_file_producer.rs @@ -0,0 +1,327 @@ +//! Support for producing static files. + +use crate::{segments, segments::Segment, StaticFileProducerEvent}; +use rayon::prelude::*; +use reth_db::database::Database; +use reth_interfaces::RethResult; +use reth_primitives::{static_file::HighestStaticFiles, BlockNumber, PruneModes}; +use reth_provider::{ + providers::{StaticFileProvider, StaticFileWriter}, + ProviderFactory, +}; +use reth_tokio_util::EventListeners; +use std::{ops::RangeInclusive, time::Instant}; +use tokio_stream::wrappers::UnboundedReceiverStream; +use tracing::{debug, trace}; + +/// Result of [StaticFileProducer::run] execution. +pub type StaticFileProducerResult = RethResult; + +/// The [StaticFileProducer] instance itself with the result of [StaticFileProducer::run] +pub type StaticFileProducerWithResult = (StaticFileProducer, StaticFileProducerResult); + +/// Static File producer routine. See [StaticFileProducer::run] for more detailed description. +#[derive(Debug, Clone)] +pub struct StaticFileProducer { + /// Provider factory + provider_factory: ProviderFactory, + /// Static File provider + static_file_provider: StaticFileProvider, + /// Pruning configuration for every part of the data that can be pruned. Set by user, and + /// needed in [StaticFileProducer] to prevent attempting to move prunable data to static files. + /// See [StaticFileProducer::get_static_file_targets]. + prune_modes: PruneModes, + listeners: EventListeners, +} + +/// Static File targets, per data part, measured in [`BlockNumber`]. +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct StaticFileTargets { + headers: Option>, + receipts: Option>, + transactions: Option>, +} + +impl StaticFileTargets { + /// Returns `true` if any of the targets are [Some]. + pub fn any(&self) -> bool { + self.headers.is_some() || self.receipts.is_some() || self.transactions.is_some() + } + + // Returns `true` if all targets are either [`None`] or has beginning of the range equal to the + // highest static_file. + fn is_contiguous_to_highest_static_files(&self, static_files: HighestStaticFiles) -> bool { + [ + (self.headers.as_ref(), static_files.headers), + (self.receipts.as_ref(), static_files.receipts), + (self.transactions.as_ref(), static_files.transactions), + ] + .iter() + .all(|(target_block_range, highest_static_fileted_block)| { + target_block_range.map_or(true, |target_block_range| { + *target_block_range.start() == + highest_static_fileted_block.map_or(0, |highest_static_fileted_block| { + highest_static_fileted_block + 1 + }) + }) + }) + } +} + +impl StaticFileProducer { + /// Creates a new [StaticFileProducer]. + pub fn new( + provider_factory: ProviderFactory, + static_file_provider: StaticFileProvider, + prune_modes: PruneModes, + ) -> Self { + Self { provider_factory, static_file_provider, prune_modes, listeners: Default::default() } + } + + /// Listen for events on the static_file_producer. + pub fn events(&mut self) -> UnboundedReceiverStream { + self.listeners.new_listener() + } + + /// Run the static_file_producer. + /// + /// For each [Some] target in [StaticFileTargets], initializes a corresponding [Segment] and + /// runs it with the provided block range using [StaticFileProvider] and a read-only + /// database transaction from [ProviderFactory]. All segments are run in parallel. + /// + /// NOTE: it doesn't delete the data from database, and the actual deleting (aka pruning) logic + /// lives in the `prune` crate. + pub fn run(&mut self, targets: StaticFileTargets) -> StaticFileProducerResult { + debug_assert!(targets.is_contiguous_to_highest_static_files( + self.static_file_provider.get_highest_static_files() + )); + + self.listeners.notify(StaticFileProducerEvent::Started { targets: targets.clone() }); + + debug!(target: "static_file", ?targets, "StaticFileProducer started"); + let start = Instant::now(); + + let mut segments = Vec::<(Box>, RangeInclusive)>::new(); + + if let Some(block_range) = targets.transactions.clone() { + segments.push((Box::new(segments::Transactions), block_range)); + } + if let Some(block_range) = targets.headers.clone() { + segments.push((Box::new(segments::Headers), block_range)); + } + if let Some(block_range) = targets.receipts.clone() { + segments.push((Box::new(segments::Receipts), block_range)); + } + + segments.par_iter().try_for_each(|(segment, block_range)| -> RethResult<()> { + debug!(target: "static_file", segment = %segment.segment(), ?block_range, "StaticFileProducer segment"); + let start = Instant::now(); + + // Create a new database transaction on every segment to prevent long-lived read-only + // transactions + let provider = self.provider_factory.provider()?.disable_long_read_transaction_safety(); + segment.copy_to_static_files(provider, self.static_file_provider.clone(), block_range.clone())?; + + let elapsed = start.elapsed(); // TODO(alexey): track in metrics + debug!(target: "static_file", segment = %segment.segment(), ?block_range, ?elapsed, "Finished StaticFileProducer segment"); + + Ok(()) + })?; + + self.static_file_provider.commit()?; + for (segment, block_range) in segments { + self.static_file_provider.update_index(segment.segment(), Some(*block_range.end()))?; + } + + let elapsed = start.elapsed(); // TODO(alexey): track in metrics + debug!(target: "static_file", ?targets, ?elapsed, "StaticFileProducer finished"); + + self.listeners + .notify(StaticFileProducerEvent::Finished { targets: targets.clone(), elapsed }); + + Ok(targets) + } + + /// Returns a static file targets at the provided finalized block numbers per segment. + /// The target is determined by the check against highest static_files using + /// [StaticFileProvider::get_highest_static_files]. + pub fn get_static_file_targets( + &self, + finalized_block_numbers: HighestStaticFiles, + ) -> RethResult { + let highest_static_files = self.static_file_provider.get_highest_static_files(); + + let targets = StaticFileTargets { + headers: finalized_block_numbers.headers.and_then(|finalized_block_number| { + self.get_static_file_target(highest_static_files.headers, finalized_block_number) + }), + // StaticFile receipts only if they're not pruned according to the user configuration + receipts: if self.prune_modes.receipts.is_none() && + self.prune_modes.receipts_log_filter.is_empty() + { + finalized_block_numbers.receipts.and_then(|finalized_block_number| { + self.get_static_file_target( + highest_static_files.receipts, + finalized_block_number, + ) + }) + } else { + None + }, + transactions: finalized_block_numbers.transactions.and_then(|finalized_block_number| { + self.get_static_file_target( + highest_static_files.transactions, + finalized_block_number, + ) + }), + }; + + trace!( + target: "static_file", + ?finalized_block_numbers, + ?highest_static_files, + ?targets, + any = %targets.any(), + "StaticFile targets" + ); + + Ok(targets) + } + + fn get_static_file_target( + &self, + highest_static_file: Option, + finalized_block_number: BlockNumber, + ) -> Option> { + let range = highest_static_file.map_or(0, |block| block + 1)..=finalized_block_number; + (!range.is_empty()).then_some(range) + } +} + +#[cfg(test)] +mod tests { + use crate::{static_file_producer::StaticFileTargets, StaticFileProducer}; + use assert_matches::assert_matches; + use reth_db::{database::Database, transaction::DbTx}; + use reth_interfaces::{ + provider::ProviderError, + test_utils::{ + generators, + generators::{random_block_range, random_receipt}, + }, + RethError, + }; + use reth_primitives::{ + static_file::HighestStaticFiles, PruneModes, StaticFileSegment, B256, U256, + }; + use reth_provider::providers::StaticFileWriter; + use reth_stages::test_utils::{StorageKind, TestStageDB}; + + #[test] + fn run() { + let mut rng = generators::rng(); + + let db = TestStageDB::default(); + + let blocks = random_block_range(&mut rng, 0..=3, B256::ZERO, 2..3); + db.insert_blocks(blocks.iter(), StorageKind::Database(None)).expect("insert blocks"); + // Unwind headers from static_files and manually insert them into the database, so we're + // able to check that static_file_producer works + db.factory + .static_file_provider() + .latest_writer(StaticFileSegment::Headers) + .expect("get static file writer for headers") + .prune_headers(blocks.len() as u64) + .expect("prune headers"); + let tx = db.factory.db_ref().tx_mut().expect("init tx"); + blocks.iter().for_each(|block| { + TestStageDB::insert_header(None, &tx, &block.header, U256::ZERO) + .expect("insert block header"); + }); + tx.commit().expect("commit tx"); + + let mut receipts = Vec::new(); + for block in &blocks { + for transaction in &block.body { + receipts + .push((receipts.len() as u64, random_receipt(&mut rng, transaction, Some(0)))); + } + } + db.insert_receipts(receipts).expect("insert receipts"); + + let provider_factory = db.factory; + let static_file_provider = provider_factory.static_file_provider(); + + let mut static_file_producer = StaticFileProducer::new( + provider_factory, + static_file_provider.clone(), + PruneModes::default(), + ); + + let targets = static_file_producer + .get_static_file_targets(HighestStaticFiles { + headers: Some(1), + receipts: Some(1), + transactions: Some(1), + }) + .expect("get static file targets"); + assert_eq!( + targets, + StaticFileTargets { + headers: Some(0..=1), + receipts: Some(0..=1), + transactions: Some(0..=1) + } + ); + assert_matches!(static_file_producer.run(targets), Ok(_)); + assert_eq!( + static_file_provider.get_highest_static_files(), + HighestStaticFiles { headers: Some(1), receipts: Some(1), transactions: Some(1) } + ); + + let targets = static_file_producer + .get_static_file_targets(HighestStaticFiles { + headers: Some(3), + receipts: Some(3), + transactions: Some(3), + }) + .expect("get static file targets"); + assert_eq!( + targets, + StaticFileTargets { + headers: Some(2..=3), + receipts: Some(2..=3), + transactions: Some(2..=3) + } + ); + assert_matches!(static_file_producer.run(targets), Ok(_)); + assert_eq!( + static_file_provider.get_highest_static_files(), + HighestStaticFiles { headers: Some(3), receipts: Some(3), transactions: Some(3) } + ); + + let targets = static_file_producer + .get_static_file_targets(HighestStaticFiles { + headers: Some(4), + receipts: Some(4), + transactions: Some(4), + }) + .expect("get static file targets"); + assert_eq!( + targets, + StaticFileTargets { + headers: Some(4..=4), + receipts: Some(4..=4), + transactions: Some(4..=4) + } + ); + assert_matches!( + static_file_producer.run(targets), + Err(RethError::Provider(ProviderError::BlockBodyIndicesNotFound(4))) + ); + assert_eq!( + static_file_provider.get_highest_static_files(), + HighestStaticFiles { headers: Some(3), receipts: Some(3), transactions: Some(3) } + ); + } +} diff --git a/crates/storage/codecs/derive/src/compact/generator.rs b/crates/storage/codecs/derive/src/compact/generator.rs index 370d74eec2a1..8cd9070bb4b2 100644 --- a/crates/storage/codecs/derive/src/compact/generator.rs +++ b/crates/storage/codecs/derive/src/compact/generator.rs @@ -52,7 +52,7 @@ pub fn generate_from_to(ident: &Ident, fields: &FieldList, is_zstd: bool) -> Tok /// Generates code to implement the `Compact` trait method `to_compact`. fn generate_from_compact(fields: &FieldList, ident: &Ident, is_zstd: bool) -> TokenStream2 { let mut lines = vec![]; - let mut known_types = vec!["B256", "Address", "Bloom", "Vec", "TxHash"]; + let mut known_types = vec!["B256", "Address", "Bloom", "Vec", "TxHash", "BlockHash"]; // Only types without `Bytes` should be added here. It's currently manually added, since // it's hard to figure out with derive_macro which types have Bytes fields. diff --git a/crates/storage/codecs/derive/src/compact/mod.rs b/crates/storage/codecs/derive/src/compact/mod.rs index 7e1ed9b42d43..f7e2082c7631 100644 --- a/crates/storage/codecs/derive/src/compact/mod.rs +++ b/crates/storage/codecs/derive/src/compact/mod.rs @@ -143,7 +143,7 @@ fn should_use_alt_impl(ftype: &String, segment: &syn::PathSegment) -> bool { if let (Some(path), 1) = (arg_path.path.segments.first(), arg_path.path.segments.len()) { - if ["B256", "Address", "Address", "Bloom", "TxHash"] + if ["B256", "Address", "Address", "Bloom", "TxHash", "BlockHash"] .contains(&path.ident.to_string().as_str()) { return true @@ -164,11 +164,6 @@ pub fn get_bit_size(ftype: &str) -> u8 { "u64" | "BlockNumber" | "TxNumber" | "ChainId" | "NumTransactions" => 4, "u128" => 5, "U256" => 6, - #[cfg(not(feature = "optimism"))] - "TxValue" => 5, // u128 for ethereum chains assuming high order bits are not used - #[cfg(feature = "optimism")] - // for fuzz/prop testing and chains that may require full 256 bits - "TxValue" => 6, _ => 0, } } diff --git a/crates/storage/db/benches/criterion.rs b/crates/storage/db/benches/criterion.rs index 39f9dc164f48..54dca69b2113 100644 --- a/crates/storage/db/benches/criterion.rs +++ b/crates/storage/db/benches/criterion.rs @@ -23,12 +23,12 @@ pub fn db(c: &mut Criterion) { group.warm_up_time(std::time::Duration::from_millis(200)); measure_table_db::(&mut group); - measure_table_db::(&mut group); + measure_table_db::(&mut group); measure_table_db::(&mut group); measure_table_db::(&mut group); measure_table_db::(&mut group); measure_table_db::(&mut group); - measure_table_db::(&mut group); + measure_table_db::(&mut group); measure_table_db::(&mut group); measure_dupsort_db::(&mut group); measure_table_db::(&mut group); @@ -40,12 +40,12 @@ pub fn serialization(c: &mut Criterion) { group.warm_up_time(std::time::Duration::from_millis(200)); measure_table_serialization::(&mut group); - measure_table_serialization::(&mut group); + measure_table_serialization::(&mut group); measure_table_serialization::(&mut group); measure_table_serialization::(&mut group); measure_table_serialization::(&mut group); measure_table_serialization::(&mut group); - measure_table_serialization::(&mut group); + measure_table_serialization::(&mut group); measure_table_serialization::(&mut group); measure_table_serialization::(&mut group); measure_table_serialization::(&mut group); diff --git a/crates/storage/db/benches/hash_keys.rs b/crates/storage/db/benches/hash_keys.rs index 1266b8c81992..5376bf5040cc 100644 --- a/crates/storage/db/benches/hash_keys.rs +++ b/crates/storage/db/benches/hash_keys.rs @@ -9,7 +9,7 @@ use proptest::{ strategy::{Strategy, ValueTree}, test_runner::TestRunner, }; -use reth_db::{cursor::DbCursorRW, TxHashNumber}; +use reth_db::{cursor::DbCursorRW, TransactionHashNumbers}; use std::collections::HashSet; criterion_group! { @@ -34,7 +34,7 @@ pub fn hash_keys(c: &mut Criterion) { group.sample_size(10); for size in [10_000, 100_000, 1_000_000] { - measure_table_insertion::(&mut group, size); + measure_table_insertion::(&mut group, size); } } diff --git a/crates/storage/db/benches/iai.rs b/crates/storage/db/benches/iai.rs index cd153774361b..9079933511c1 100644 --- a/crates/storage/db/benches/iai.rs +++ b/crates/storage/db/benches/iai.rs @@ -79,12 +79,12 @@ macro_rules! impl_iai { impl_iai!( CanonicalHeaders, - HeaderTD, + HeaderTerminalDifficulties, HeaderNumbers, Headers, BlockBodyIndices, BlockOmmers, - TxHashNumber, + TransactionHashNumbers, Transactions, PlainStorageState, PlainAccountState diff --git a/crates/storage/db/src/implementation/mdbx/mod.rs b/crates/storage/db/src/implementation/mdbx/mod.rs index 9e45049b7757..5a3e01a3e55b 100644 --- a/crates/storage/db/src/implementation/mdbx/mod.rs +++ b/crates/storage/db/src/implementation/mdbx/mod.rs @@ -232,7 +232,10 @@ impl DatabaseEnv { } }; - inner_env.set_max_dbs(Tables::ALL.len()); + // Note: We set max dbs to 256 here to allow for custom tables. This needs to be set on + // environment creation. + debug_assert!(Tables::ALL.len() <= 256, "number of tables exceed max dbs"); + inner_env.set_max_dbs(256); inner_env.set_geometry(Geometry { // Maximum database size of 4 terabytes size: Some(0..(4 * TERABYTE)), @@ -385,10 +388,12 @@ mod tests { abstraction::table::{Encode, Table}, cursor::{DbCursorRO, DbCursorRW, DbDupCursorRO, DbDupCursorRW, ReverseWalker, Walker}, models::{AccountBeforeTx, ShardedKey}, - tables::{AccountHistory, CanonicalHeaders, Headers, PlainAccountState, PlainStorageState}, + tables::{ + AccountsHistory, CanonicalHeaders, Headers, PlainAccountState, PlainStorageState, + }, test_utils::*, transaction::{DbTx, DbTxMut}, - AccountChangeSet, + AccountChangeSets, }; use reth_interfaces::db::{DatabaseWriteError, DatabaseWriteOperation}; use reth_libmdbx::Error; @@ -544,24 +549,24 @@ mod tests { let address2 = Address::with_last_byte(2); let tx = db.tx_mut().expect(ERROR_INIT_TX); - tx.put::(0, AccountBeforeTx { address: address0, info: None }) + tx.put::(0, AccountBeforeTx { address: address0, info: None }) .expect(ERROR_PUT); - tx.put::(0, AccountBeforeTx { address: address1, info: None }) + tx.put::(0, AccountBeforeTx { address: address1, info: None }) .expect(ERROR_PUT); - tx.put::(0, AccountBeforeTx { address: address2, info: None }) + tx.put::(0, AccountBeforeTx { address: address2, info: None }) .expect(ERROR_PUT); - tx.put::(1, AccountBeforeTx { address: address0, info: None }) + tx.put::(1, AccountBeforeTx { address: address0, info: None }) .expect(ERROR_PUT); - tx.put::(1, AccountBeforeTx { address: address1, info: None }) + tx.put::(1, AccountBeforeTx { address: address1, info: None }) .expect(ERROR_PUT); - tx.put::(1, AccountBeforeTx { address: address2, info: None }) + tx.put::(1, AccountBeforeTx { address: address2, info: None }) .expect(ERROR_PUT); - tx.put::(2, AccountBeforeTx { address: address0, info: None }) // <- should not be returned by the walker + tx.put::(2, AccountBeforeTx { address: address0, info: None }) // <- should not be returned by the walker .expect(ERROR_PUT); tx.commit().expect(ERROR_COMMIT); let tx = db.tx().expect(ERROR_INIT_TX); - let mut cursor = tx.cursor_read::().unwrap(); + let mut cursor = tx.cursor_read::().unwrap(); let entries = cursor.walk_range(..).unwrap().collect::, _>>().unwrap(); assert_eq!(entries.len(), 7); @@ -958,7 +963,7 @@ mod tests { let transition_id = 2; let tx = db.tx_mut().expect(ERROR_INIT_TX); - let mut cursor = tx.cursor_write::().unwrap(); + let mut cursor = tx.cursor_write::().unwrap(); vec![0, 1, 3, 4, 5] .into_iter() .try_for_each(|val| { @@ -973,7 +978,7 @@ mod tests { // APPEND DUP & APPEND let subkey_to_append = 2; let tx = db.tx_mut().expect(ERROR_INIT_TX); - let mut cursor = tx.cursor_write::().unwrap(); + let mut cursor = tx.cursor_write::().unwrap(); assert_eq!( cursor.append_dup( transition_id, @@ -982,7 +987,7 @@ mod tests { Err(DatabaseWriteError { info: Error::KeyMismatch.into(), operation: DatabaseWriteOperation::CursorAppendDup, - table_name: AccountChangeSet::NAME, + table_name: AccountChangeSets::NAME, key: transition_id.encode().into(), } .into()) @@ -995,7 +1000,7 @@ mod tests { Err(DatabaseWriteError { info: Error::KeyMismatch.into(), operation: DatabaseWriteOperation::CursorAppend, - table_name: AccountChangeSet::NAME, + table_name: AccountChangeSets::NAME, key: (transition_id - 1).encode().into(), } .into()) @@ -1184,13 +1189,14 @@ mod tests { let key = ShardedKey::new(real_key, i * 100); let list: IntegerList = vec![i * 100u64].into(); - db.update(|tx| tx.put::(key.clone(), list.clone()).expect("")).unwrap(); + db.update(|tx| tx.put::(key.clone(), list.clone()).expect("")) + .unwrap(); } // Seek value with non existing key. { let tx = db.tx().expect(ERROR_INIT_TX); - let mut cursor = tx.cursor_read::().unwrap(); + let mut cursor = tx.cursor_read::().unwrap(); // It will seek the one greater or equal to the query. Since we have `Address | 100`, // `Address | 200` in the database and we're querying `Address | 150` it will return us @@ -1208,7 +1214,7 @@ mod tests { // Seek greatest index { let tx = db.tx().expect(ERROR_INIT_TX); - let mut cursor = tx.cursor_read::().unwrap(); + let mut cursor = tx.cursor_read::().unwrap(); // It will seek the MAX value of transition index and try to use prev to get first // biggers. diff --git a/crates/storage/db/src/lib.rs b/crates/storage/db/src/lib.rs index 2bbd9edfff75..ea260eaebf89 100644 --- a/crates/storage/db/src/lib.rs +++ b/crates/storage/db/src/lib.rs @@ -67,7 +67,7 @@ pub mod abstraction; mod implementation; mod metrics; -pub mod snapshot; +pub mod static_file; pub mod tables; mod utils; pub mod version; @@ -98,7 +98,7 @@ pub fn init_db>(path: P, args: DatabaseArguments) -> eyre::Result let rpath = path.as_ref(); if is_database_empty(rpath) { - std::fs::create_dir_all(rpath) + reth_primitives::fs::create_dir_all(rpath) .wrap_err_with(|| format!("Could not create database directory {}", rpath.display()))?; create_db_version_file(rpath)?; } else { @@ -163,6 +163,8 @@ pub mod test_utils { pub const ERROR_DB_OPEN: &str = "Not able to open the database file."; /// Error during database creation pub const ERROR_DB_CREATION: &str = "Not able to create the database file."; + /// Error during database creation + pub const ERROR_STATIC_FILES_CREATION: &str = "Not able to create the static file path."; /// Error during table creation pub const ERROR_TABLE_CREATION: &str = "Not able to create tables in the database."; /// Error during tempdir creation @@ -225,6 +227,15 @@ pub mod test_utils { } } + /// Create static_files path for testing + pub fn create_test_static_files_dir() -> PathBuf { + let path = tempdir_path(); + let emsg = format!("{}: {:?}", ERROR_STATIC_FILES_CREATION, path); + + reth_primitives::fs::create_dir_all(path.clone()).expect(&emsg); + path + } + /// Get a temporary directory path to use for the database pub fn tempdir_path() -> PathBuf { let builder = tempfile::Builder::new().prefix("reth-test-").rand_bytes(8).tempdir(); diff --git a/crates/storage/db/src/snapshot/masks.rs b/crates/storage/db/src/snapshot/masks.rs deleted file mode 100644 index aecf151ebd84..000000000000 --- a/crates/storage/db/src/snapshot/masks.rs +++ /dev/null @@ -1,28 +0,0 @@ -use super::{ReceiptMask, TransactionMask}; -use crate::{ - add_snapshot_mask, - snapshot::mask::{ColumnSelectorOne, ColumnSelectorTwo, HeaderMask}, - table::Table, - CanonicalHeaders, HeaderTD, Receipts, Transactions, -}; -use reth_primitives::{BlockHash, Header}; - -// HEADER MASKS - -add_snapshot_mask!(HeaderMask, Header, 0b001); -add_snapshot_mask!(HeaderMask, ::Value, 0b010); -add_snapshot_mask!(HeaderMask, BlockHash, 0b100); - -add_snapshot_mask!(HeaderMask, Header, BlockHash, 0b101); -add_snapshot_mask!( - HeaderMask, - ::Value, - ::Value, - 0b110 -); - -// RECEIPT MASKS -add_snapshot_mask!(ReceiptMask, ::Value, 0b1); - -// TRANSACTION MASKS -add_snapshot_mask!(TransactionMask, ::Value, 0b1); diff --git a/crates/storage/db/src/snapshot/mod.rs b/crates/storage/db/src/snapshot/mod.rs deleted file mode 100644 index 0856466d23ac..000000000000 --- a/crates/storage/db/src/snapshot/mod.rs +++ /dev/null @@ -1,76 +0,0 @@ -//! reth's snapshot database table import and access - -mod generation; -use std::{ - collections::{hash_map::Entry, HashMap}, - ops::RangeInclusive, - path::Path, -}; - -pub use generation::*; - -mod cursor; -pub use cursor::SnapshotCursor; - -mod mask; -pub use mask::*; -use reth_nippy_jar::{NippyJar, NippyJarError}; -use reth_primitives::{snapshot::SegmentHeader, BlockNumber, SnapshotSegment, TxNumber}; - -mod masks; - -/// Alias type for a map of [`SnapshotSegment`] and sorted lists of existing snapshot ranges. -type SortedSnapshots = - HashMap, RangeInclusive)>>; - -/// Given the snapshots directory path, it returns a list over the existing snapshots organized by -/// [`SnapshotSegment`]. Each segment has a sorted list of block ranges and transaction ranges. -pub fn iter_snapshots(path: impl AsRef) -> Result { - let mut static_files = SortedSnapshots::default(); - let entries = reth_primitives::fs::read_dir(path.as_ref()) - .map_err(|err| NippyJarError::Custom(err.to_string()))? - .filter_map(Result::ok) - .collect::>(); - - for entry in entries { - if entry.metadata().map_or(false, |metadata| metadata.is_file()) { - if let Some((segment, block_range, tx_range)) = - SnapshotSegment::parse_filename(&entry.file_name()) - { - let ranges = (block_range, tx_range); - match static_files.entry(segment) { - Entry::Occupied(mut entry) => { - entry.get_mut().push(ranges); - } - Entry::Vacant(entry) => { - entry.insert(vec![ranges]); - } - } - } - } - } - - for (segment, range_list) in static_files.iter_mut() { - // Sort by block end range. - range_list.sort_by(|a, b| a.0.end().cmp(b.0.end())); - - if let Some((block_range, tx_range)) = range_list.pop() { - // The highest height static file filename might not be indicative of its actual - // block_range, so we need to read its actual configuration. - let jar = NippyJar::::load( - &path.as_ref().join(segment.filename(&block_range, &tx_range)), - )?; - - if &tx_range != jar.user_header().tx_range() { - // TODO(joshie): rename - } - - range_list.push(( - jar.user_header().block_range().clone(), - jar.user_header().tx_range().clone(), - )); - } - } - - Ok(static_files) -} diff --git a/crates/storage/db/src/snapshot/cursor.rs b/crates/storage/db/src/static_file/cursor.rs similarity index 77% rename from crates/storage/db/src/snapshot/cursor.rs rename to crates/storage/db/src/static_file/cursor.rs index f778b39a03ed..237cbe4518d8 100644 --- a/crates/storage/db/src/snapshot/cursor.rs +++ b/crates/storage/db/src/static_file/cursor.rs @@ -3,23 +3,23 @@ use crate::table::Decompress; use derive_more::{Deref, DerefMut}; use reth_interfaces::provider::ProviderResult; use reth_nippy_jar::{DataReader, NippyJar, NippyJarCursor}; -use reth_primitives::{snapshot::SegmentHeader, B256}; +use reth_primitives::{static_file::SegmentHeader, B256}; use std::sync::Arc; -/// Cursor of a snapshot segment. +/// Cursor of a static file segment. #[derive(Debug, Deref, DerefMut)] -pub struct SnapshotCursor<'a>(NippyJarCursor<'a, SegmentHeader>); +pub struct StaticFileCursor<'a>(NippyJarCursor<'a, SegmentHeader>); -impl<'a> SnapshotCursor<'a> { - /// Returns a new [`SnapshotCursor`]. +impl<'a> StaticFileCursor<'a> { + /// Returns a new [`StaticFileCursor`]. pub fn new(jar: &'a NippyJar, reader: Arc) -> ProviderResult { Ok(Self(NippyJarCursor::with_reader(jar, reader)?)) } /// Returns the current `BlockNumber` or `TxNumber` of the cursor depending on the kind of - /// snapshot segment. - pub fn number(&self) -> u64 { - self.row_index() + self.jar().user_header().start() + /// static file segment. + pub fn number(&self) -> Option { + self.jar().user_header().start().map(|start| self.row_index() + start) } /// Gets a row of values. @@ -28,15 +28,21 @@ impl<'a> SnapshotCursor<'a> { key_or_num: KeyOrNumber<'_>, mask: usize, ) -> ProviderResult>> { + if self.jar().rows() == 0 { + return Ok(None) + } + let row = match key_or_num { KeyOrNumber::Key(k) => self.row_by_key_with_cols(k, mask), - KeyOrNumber::Number(n) => { - let offset = self.jar().user_header().start(); - if offset > n { - return Ok(None) + KeyOrNumber::Number(n) => match self.jar().user_header().start() { + Some(offset) => { + if offset > n { + return Ok(None) + } + self.row_by_number_with_cols((n - offset) as usize, mask) } - self.row_by_number_with_cols((n - offset) as usize, mask) - } + None => Ok(None), + }, }?; Ok(row) diff --git a/crates/storage/db/src/snapshot/generation.rs b/crates/storage/db/src/static_file/generation.rs similarity index 67% rename from crates/storage/db/src/snapshot/generation.rs rename to crates/storage/db/src/static_file/generation.rs index ea1c1e65431d..0c667e1075fc 100644 --- a/crates/storage/db/src/snapshot/generation.rs +++ b/crates/storage/db/src/static_file/generation.rs @@ -10,16 +10,16 @@ use reth_nippy_jar::{ColumnResult, NippyJar, NippyJarHeader, PHFKey}; use reth_tracing::tracing::*; use std::{error::Error as StdError, ops::RangeInclusive}; -/// Macro that generates snapshot creation functions that take an arbitratry number of [`Table`] and -/// creates a [`NippyJar`] file out of their [`Table::Value`]. Each list of [`Table::Value`] from a -/// table is a column of values. +/// Macro that generates static file creation functions that take an arbitratry number of [`Table`] +/// and creates a [`NippyJar`] file out of their [`Table::Value`]. Each list of [`Table::Value`] +/// from a table is a column of values. /// /// Has membership filter set and compression dictionary support. -macro_rules! generate_snapshot_func { +macro_rules! generate_static_file_func { ($(($($tbl:ident),+)),+ $(,)? ) => { $( paste::item! { - /// Creates a snapshot from specified tables. Each table's `Value` iterator represents a column. + /// Creates a static file from specified tables. Each table's `Value` iterator represents a column. /// /// **Ensure the range contains the same number of rows.** /// @@ -29,9 +29,9 @@ macro_rules! generate_snapshot_func { /// * `keys`: Iterator of keys (eg. `TxHash` or `BlockHash`) with length equal to `row_count` and ordered by future column insertion from `range`. /// * `dict_compression_set`: Sets of column data for compression dictionaries. Max size is 2GB. Row count is independent. /// * `row_count`: Total rows to add to `NippyJar`. Must match row count in `range`. - /// * `nippy_jar`: Snapshot object responsible for file generation. + /// * `nippy_jar`: Static File object responsible for file generation. #[allow(non_snake_case)] - pub fn []< + pub fn []< $($tbl: Table,)+ K, H: NippyJarHeader @@ -43,27 +43,27 @@ macro_rules! generate_snapshot_func { dict_compression_set: Option>>>, keys: Option>>, row_count: usize, - nippy_jar: &mut NippyJar + mut nippy_jar: NippyJar ) -> ProviderResult<()> where K: Key + Copy { let additional = additional.unwrap_or_default(); - debug!(target: "reth::snapshot", ?range, "Creating snapshot {:?} and {} more columns.", vec![$($tbl::NAME,)+], additional.len()); + debug!(target: "reth::static_file", ?range, "Creating static file {:?} and {} more columns.", vec![$($tbl::NAME,)+], additional.len()); let range: RangeInclusive> = RawKey::new(*range.start())..=RawKey::new(*range.end()); // Create PHF and Filter if required if let Some(keys) = keys { - debug!(target: "reth::snapshot", "Calculating Filter, PHF and offset index list"); + debug!(target: "reth::static_file", "Calculating Filter, PHF and offset index list"); nippy_jar.prepare_index(keys, row_count)?; - debug!(target: "reth::snapshot", "Filter, PHF and offset index list calculated."); + debug!(target: "reth::static_file", "Filter, PHF and offset index list calculated."); } // Create compression dictionaries if required if let Some(data_sets) = dict_compression_set { - debug!(target: "reth::snapshot", "Creating compression dictionaries."); + debug!(target: "reth::static_file", "Creating compression dictionaries."); nippy_jar.prepare_compression(data_sets)?; - debug!(target: "reth::snapshot", "Compression dictionaries created."); + debug!(target: "reth::static_file", "Compression dictionaries created."); } // Creates the cursors for the columns @@ -80,17 +80,17 @@ macro_rules! generate_snapshot_func { )+ - // Create the snapshot from the data + // Create the static file from the data let col_iterators: Vec,_>>>> = vec![ $(Box::new([< $tbl _iter>]),)+ ]; - debug!(target: "reth::snapshot", jar=?nippy_jar, "Generating snapshot file."); + debug!(target: "reth::static_file", jar=?nippy_jar, "Generating static file."); - nippy_jar.freeze(col_iterators.into_iter().chain(additional).collect(), row_count as u64)?; + let nippy_jar = nippy_jar.freeze(col_iterators.into_iter().chain(additional).collect(), row_count as u64)?; - debug!(target: "reth::snapshot", jar=?nippy_jar, "Snapshot file generated."); + debug!(target: "reth::static_file", jar=?nippy_jar, "Static file generated."); Ok(()) } @@ -99,4 +99,4 @@ macro_rules! generate_snapshot_func { }; } -generate_snapshot_func!((T1), (T1, T2), (T1, T2, T3), (T1, T2, T3, T4), (T1, T2, T3, T4, T5),); +generate_static_file_func!((T1), (T1, T2), (T1, T2, T3), (T1, T2, T3, T4), (T1, T2, T3, T4, T5),); diff --git a/crates/storage/db/src/snapshot/mask.rs b/crates/storage/db/src/static_file/mask.rs similarity index 75% rename from crates/storage/db/src/snapshot/mask.rs rename to crates/storage/db/src/static_file/mask.rs index 7b8cb016772c..de5932ea9ac3 100644 --- a/crates/storage/db/src/snapshot/mask.rs +++ b/crates/storage/db/src/static_file/mask.rs @@ -4,14 +4,14 @@ use crate::table::Decompress; /// /// #### Explanation: /// -/// A `NippyJar` snapshot row can contain multiple column values. To specify the column values +/// A `NippyJar` static file row can contain multiple column values. To specify the column values /// to be read, a mask is utilized. /// -/// For example, a snapshot with three columns, if the first and last columns are queried, the mask -/// `0b101` would be passed. To select only the second column, the mask `0b010` would be used. +/// For example, a static file with three columns, if the first and last columns are queried, the +/// mask `0b101` would be passed. To select only the second column, the mask `0b010` would be used. /// -/// Since each snapshot has its own column distribution, different wrapper types are necessary. For -/// instance, `B256` might be the third column in the `Header` segment, while being the second +/// Since each static file has its own column distribution, different wrapper types are necessary. +/// For instance, `B256` might be the third column in the `Header` segment, while being the second /// column in another segment. Hence, `Mask` would only be applicable to one of these /// scenarios. /// @@ -24,7 +24,7 @@ macro_rules! add_segments { ($($segment:tt),+) => { paste::paste! { $( - #[doc = concat!("Mask for ", stringify!($segment), " snapshot segment. See [`Mask`] for more.")] + #[doc = concat!("Mask for ", stringify!($segment), " static file segment. See [`Mask`] for more.")] #[derive(Debug)] pub struct [<$segment Mask>](Mask); )+ @@ -37,7 +37,7 @@ add_segments!(Header, Receipt, Transaction); pub trait ColumnSelectorOne { /// First desired column value type FIRST: Decompress; - /// Mask to obtain desired values, should correspond to the order of columns in a snapshot. + /// Mask to obtain desired values, should correspond to the order of columns in a static_file. const MASK: usize; } @@ -47,7 +47,7 @@ pub trait ColumnSelectorTwo { type FIRST: Decompress; /// Second desired column value type SECOND: Decompress; - /// Mask to obtain desired values, should correspond to the order of columns in a snapshot. + /// Mask to obtain desired values, should correspond to the order of columns in a static_file. const MASK: usize; } @@ -59,13 +59,13 @@ pub trait ColumnSelectorThree { type SECOND: Decompress; /// Third desired column value type THIRD: Decompress; - /// Mask to obtain desired values, should correspond to the order of columns in a snapshot. + /// Mask to obtain desired values, should correspond to the order of columns in a static_file. const MASK: usize; } #[macro_export] -/// Add mask to select `N` column values from a specific snapshot segment row. -macro_rules! add_snapshot_mask { +/// Add mask to select `N` column values from a specific static file segment row. +macro_rules! add_static_file_mask { ($mask_struct:tt, $type1:ty, $mask:expr) => { impl ColumnSelectorOne for $mask_struct<$type1> { type FIRST = $type1; @@ -80,7 +80,7 @@ macro_rules! add_snapshot_mask { } }; ($mask_struct:tt, $type1:ty, $type2:ty, $type3:ty, $mask:expr) => { - impl ColumnSelectorTwo for $mask_struct<$type1, $type2, $type3> { + impl ColumnSelectorThree for $mask_struct<$type1, $type2, $type3> { type FIRST = $type1; type SECOND = $type2; type THIRD = $type3; diff --git a/crates/storage/db/src/static_file/masks.rs b/crates/storage/db/src/static_file/masks.rs new file mode 100644 index 000000000000..ab3e0d99e12c --- /dev/null +++ b/crates/storage/db/src/static_file/masks.rs @@ -0,0 +1,21 @@ +use super::{ReceiptMask, TransactionMask}; +use crate::{ + add_static_file_mask, + static_file::mask::{ColumnSelectorOne, ColumnSelectorTwo, HeaderMask}, + table::Table, + HeaderTerminalDifficulties, Receipts, Transactions, +}; +use reth_primitives::{BlockHash, Header}; + +// HEADER MASKS +add_static_file_mask!(HeaderMask, Header, 0b001); +add_static_file_mask!(HeaderMask, ::Value, 0b010); +add_static_file_mask!(HeaderMask, BlockHash, 0b100); +add_static_file_mask!(HeaderMask, Header, BlockHash, 0b101); +add_static_file_mask!(HeaderMask, ::Value, BlockHash, 0b110); + +// RECEIPT MASKS +add_static_file_mask!(ReceiptMask, ::Value, 0b1); + +// TRANSACTION MASKS +add_static_file_mask!(TransactionMask, ::Value, 0b1); diff --git a/crates/storage/db/src/static_file/mod.rs b/crates/storage/db/src/static_file/mod.rs new file mode 100644 index 000000000000..eed27e0de954 --- /dev/null +++ b/crates/storage/db/src/static_file/mod.rs @@ -0,0 +1,76 @@ +//! reth's static file database table import and access + +mod generation; +use std::{ + collections::{hash_map::Entry, HashMap}, + path::Path, +}; + +pub use generation::*; + +mod cursor; +pub use cursor::StaticFileCursor; + +mod mask; +pub use mask::*; +use reth_nippy_jar::{NippyJar, NippyJarError}; +use reth_primitives::{ + static_file::{SegmentHeader, SegmentRangeInclusive}, + StaticFileSegment, +}; + +mod masks; + +/// Alias type for a map of [`StaticFileSegment`] and sorted lists of existing static file ranges. +type SortedStaticFiles = + HashMap)>>; + +/// Given the static_files directory path, it returns a list over the existing static_files +/// organized by [`StaticFileSegment`]. Each segment has a sorted list of block ranges and +/// transaction ranges as presented in the file configuration. +pub fn iter_static_files(path: impl AsRef) -> Result { + let path = path.as_ref(); + if !path.exists() { + reth_primitives::fs::create_dir_all(path) + .map_err(|err| NippyJarError::Custom(err.to_string()))?; + } + + let mut static_files = SortedStaticFiles::default(); + let entries = reth_primitives::fs::read_dir(path) + .map_err(|err| NippyJarError::Custom(err.to_string()))? + .filter_map(Result::ok) + .collect::>(); + + for entry in entries { + if entry.metadata().map_or(false, |metadata| metadata.is_file()) { + if let Some((segment, _)) = + StaticFileSegment::parse_filename(&entry.file_name().to_string_lossy()) + { + let jar = NippyJar::::load(&entry.path())?; + + let (block_range, tx_range) = ( + jar.user_header().block_range().copied(), + jar.user_header().tx_range().copied(), + ); + + if let Some(block_range) = block_range { + match static_files.entry(segment) { + Entry::Occupied(mut entry) => { + entry.get_mut().push((block_range, tx_range)); + } + Entry::Vacant(entry) => { + entry.insert(vec![(block_range, tx_range)]); + } + } + } + } + } + } + + for (_, range_list) in static_files.iter_mut() { + // Sort by block end range. + range_list.sort_by(|a, b| a.0.end().cmp(&b.0.end())); + } + + Ok(static_files) +} diff --git a/crates/storage/db/src/tables/codecs/compact.rs b/crates/storage/db/src/tables/codecs/compact.rs index f31e61026e65..38722eb49036 100644 --- a/crates/storage/db/src/tables/codecs/compact.rs +++ b/crates/storage/db/src/tables/codecs/compact.rs @@ -29,6 +29,7 @@ macro_rules! impl_compression_for_compact { } impl_compression_for_compact!( + SealedHeader, Header, Account, Log, diff --git a/crates/storage/db/src/tables/mod.rs b/crates/storage/db/src/tables/mod.rs index 3ccce8a9e1b9..3f3c6bf176a3 100644 --- a/crates/storage/db/src/tables/mod.rs +++ b/crates/storage/db/src/tables/mod.rs @@ -246,7 +246,7 @@ tables! { table CanonicalHeaders; /// Stores the total difficulty from a block header. - table HeaderTD; + table HeaderTerminalDifficulties; /// Stores the block number corresponding to a header. table HeaderNumbers; @@ -269,12 +269,12 @@ tables! { table Transactions; /// Stores the mapping of the transaction hash to the transaction number. - table TxHashNumber; + table TransactionHashNumbers; /// Stores the mapping of transaction number to the blocks number. /// /// The key is the highest transaction ID in the block. - table TransactionBlock; + table TransactionBlocks; /// Canonical only Stores transaction receipts. table Receipts; @@ -309,7 +309,7 @@ tables! { /// * If there were no shard we would get `None` entry or entry of different storage key. /// /// Code example can be found in `reth_provider::HistoricalStateProviderRef` - table AccountHistory, Value = BlockNumberList>; + table AccountsHistory, Value = BlockNumberList>; /// Stores pointers to block number changeset with changes for each storage key. /// @@ -329,29 +329,29 @@ tables! { /// * If there were no shard we would get `None` entry or entry of different storage key. /// /// Code example can be found in `reth_provider::HistoricalStateProviderRef` - table StorageHistory; + table StoragesHistory; /// Stores the state of an account before a certain transaction changed it. /// Change on state can be: account is created, selfdestructed, touched while empty /// or changed balance,nonce. - table AccountChangeSet; + table AccountChangeSets; /// Stores the state of a storage key before a certain transaction changed it. /// If [`StorageEntry::value`] is zero, this means storage was not existing /// and needs to be removed. - table StorageChangeSet; + table StorageChangeSets; /// Stores the current state of an [`Account`] indexed with `keccak256Address` /// This table is in preparation for merkelization and calculation of state root. /// We are saving whole account data as it is needed for partial update when /// part of storage is changed. Benefit for merkelization is that hashed addresses are sorted. - table HashedAccount; + table HashedAccounts; /// Stores the current storage values indexed with `keccak256Address` and /// hash of storage key `keccak256key`. /// This table is in preparation for merkelization and calculation of state root. /// Benefit for merklization is that hashed addresses/keys are sorted. - table HashedStorage; + table HashedStorages; /// Stores the current state's Merkle Patricia Tree. table AccountsTrie; @@ -362,13 +362,13 @@ tables! { /// Stores the transaction sender for each canonical transaction. /// It is needed to speed up execution stage and allows fetching signer without doing /// transaction signed recovery - table TxSenders; + table TransactionSenders; /// Stores the highest synced block number and stage-specific checkpoint of each stage. - table SyncStage; + table StageCheckpoints; /// Stores arbitrary data to keep track of a stage first-sync progress. - table SyncStageProgress>; + table StageCheckpointProgresses>; /// Stores the highest pruned block number and prune mode of each prune segment. table PruneCheckpoints; diff --git a/crates/storage/db/src/tables/models/accounts.rs b/crates/storage/db/src/tables/models/accounts.rs index 767f321d9023..9b926c0203be 100644 --- a/crates/storage/db/src/tables/models/accounts.rs +++ b/crates/storage/db/src/tables/models/accounts.rs @@ -11,7 +11,7 @@ use reth_codecs::{derive_arbitrary, Compact}; use reth_primitives::{Account, Address, BlockNumber, Buf}; use serde::{Deserialize, Serialize}; -/// Account as it is saved inside [`AccountChangeSet`][crate::tables::AccountChangeSet]. +/// Account as it is saved inside [`AccountChangeSets`][crate::tables::AccountChangeSets]. /// /// [`Address`] is the subkey. #[derive_arbitrary(compact)] @@ -57,7 +57,7 @@ impl Compact for AccountBeforeTx { } /// [`BlockNumber`] concatenated with [`Address`]. Used as the key for -/// [`StorageChangeSet`](crate::tables::StorageChangeSet) +/// [`StorageChangeSets`](crate::tables::StorageChangeSets) /// /// Since it's used as a key, it isn't compressed when encoding it. #[derive( diff --git a/crates/storage/db/src/tables/raw.rs b/crates/storage/db/src/tables/raw.rs index 58c6b4e06216..90d4b96aec1f 100644 --- a/crates/storage/db/src/tables/raw.rs +++ b/crates/storage/db/src/tables/raw.rs @@ -53,6 +53,12 @@ impl RawKey { Self { key: K::encode(key).into(), _phantom: std::marker::PhantomData } } + /// Creates a raw key from an existing `Vec`. Useful when we already have the encoded + /// key. + pub fn from_vec(vec: Vec) -> Self { + Self { key: vec, _phantom: std::marker::PhantomData } + } + /// Returns the decoded value. pub fn key(&self) -> Result { K::decode(&self.key) @@ -112,6 +118,12 @@ impl RawValue { Self { value: V::compress(value).into(), _phantom: std::marker::PhantomData } } + /// Creates a raw value from an existing `Vec`. Useful when we already have the encoded + /// value. + pub fn from_vec(vec: Vec) -> Self { + Self { value: vec, _phantom: std::marker::PhantomData } + } + /// Returns the decompressed value. pub fn value(&self) -> Result { V::decompress(&self.value) diff --git a/crates/storage/db/src/version.rs b/crates/storage/db/src/version.rs index 63357b8e9fba..c7cebaed8a10 100644 --- a/crates/storage/db/src/version.rs +++ b/crates/storage/db/src/version.rs @@ -8,8 +8,8 @@ use std::{ /// The name of the file that contains the version of the database. pub const DB_VERSION_FILE_NAME: &str = "database.version"; /// The version of the database stored in the [DB_VERSION_FILE_NAME] file in the same directory as -/// database. Example: `1`. -pub const DB_VERSION: u64 = 1; +/// database. +pub const DB_VERSION: u64 = 2; /// Error when checking a database version using [check_db_version_file] #[derive(thiserror::Error, Debug)] diff --git a/crates/storage/libmdbx-rs/Cargo.toml b/crates/storage/libmdbx-rs/Cargo.toml index e06eb089acc0..2330b6f79e47 100644 --- a/crates/storage/libmdbx-rs/Cargo.toml +++ b/crates/storage/libmdbx-rs/Cargo.toml @@ -17,13 +17,13 @@ name = "reth_libmdbx" [dependencies] bitflags.workspace = true byteorder = "1" -derive_more = "0.99" +derive_more.workspace = true indexmap = "2" libc = "0.2" parking_lot.workspace = true thiserror.workspace = true dashmap = { version = "5.5.3", features = ["inline"], optional = true } -tracing = { workspace = true, optional = true } +tracing.workspace = true ffi = { package = "reth-mdbx-sys", path = "./mdbx-sys" } @@ -33,7 +33,7 @@ libffi = "3.2.0" [features] default = [] return-borrowed = [] -read-tx-timeouts = ["dashmap", "dashmap/inline", "tracing"] +read-tx-timeouts = ["dashmap", "dashmap/inline"] [dev-dependencies] pprof = { workspace = true, features = ["flamegraph", "frame-pointer", "criterion"] } diff --git a/crates/storage/libmdbx-rs/src/environment.rs b/crates/storage/libmdbx-rs/src/environment.rs index fd1330210082..91bf80edbf3e 100644 --- a/crates/storage/libmdbx-rs/src/environment.rs +++ b/crates/storage/libmdbx-rs/src/environment.rs @@ -20,6 +20,7 @@ use std::{ thread::sleep, time::Duration, }; +use tracing::warn; /// The default maximum duration of a read transaction. #[cfg(feature = "read-tx-timeouts")] @@ -96,6 +97,7 @@ impl Environment { /// Create a read-write transaction for use with the environment. This method will block while /// there are any other read-write transactions open on the environment. pub fn begin_rw_txn(&self) -> Result> { + let mut warned = false; let txn = loop { let (tx, rx) = sync_channel(0); self.txn_manager().send_message(TxnManagerMessage::Begin { @@ -105,6 +107,10 @@ impl Environment { }); let res = rx.recv().unwrap(); if let Err(Error::Busy) = &res { + if !warned { + warned = true; + warn!(target: "libmdbx", "Process stalled, awaiting read-write transaction lock."); + } sleep(Duration::from_millis(250)); continue } @@ -937,7 +943,8 @@ mod tests { .open(tempdir.path()) .unwrap(); - // Insert some data in the database, so the read transaction can lock on the snapshot of it + // Insert some data in the database, so the read transaction can lock on the static file of + // it { let tx = env.begin_rw_txn().unwrap(); let db = tx.open_db(None).unwrap(); @@ -950,7 +957,8 @@ mod tests { // Create a read transaction let _tx_ro = env.begin_ro_txn().unwrap(); - // Change previously inserted data, so the read transaction would use the previous snapshot + // Change previously inserted data, so the read transaction would use the previous static + // file { let tx = env.begin_rw_txn().unwrap(); let db = tx.open_db(None).unwrap(); @@ -961,7 +969,7 @@ mod tests { } // Insert more data in the database, so we hit the DB size limit error, and MDBX tries to - // kick long-lived readers and delete their snapshots + // kick long-lived readers and delete their static_files { let tx = env.begin_rw_txn().unwrap(); let db = tx.open_db(None).unwrap(); diff --git a/crates/storage/libmdbx-rs/src/flags.rs b/crates/storage/libmdbx-rs/src/flags.rs index ad88c1fbedc9..f984ffcaf024 100644 --- a/crates/storage/libmdbx-rs/src/flags.rs +++ b/crates/storage/libmdbx-rs/src/flags.rs @@ -25,7 +25,7 @@ pub enum SyncMode { /// /// [SyncMode::UtterlyNoSync] the [SyncMode::SafeNoSync] flag disable similarly flush system /// buffers to disk when committing a transaction. But there is a huge difference in how - /// are recycled the MVCC snapshots corresponding to previous "steady" transactions (see + /// are recycled the MVCC static_files corresponding to previous "steady" transactions (see /// below). /// /// With [crate::EnvironmentKind::WriteMap] the [SyncMode::SafeNoSync] instructs MDBX to use diff --git a/crates/storage/nippy-jar/Cargo.toml b/crates/storage/nippy-jar/Cargo.toml index fb7fc4ae7d9e..7ed18e6a6598 100644 --- a/crates/storage/nippy-jar/Cargo.toml +++ b/crates/storage/nippy-jar/Cargo.toml @@ -15,6 +15,8 @@ workspace = true name = "reth_nippy_jar" [dependencies] +# reth +reth-primitives.workspace = true # filter ph = "0.8.0" @@ -33,7 +35,7 @@ serde = { version = "1.0", features = ["derive"] } tracing = "0.1.0" anyhow = "1.0" thiserror.workspace = true -derive_more = "0.99" +derive_more.workspace = true [dev-dependencies] rand = { version = "0.8", features = ["small_rng"] } diff --git a/crates/storage/nippy-jar/src/error.rs b/crates/storage/nippy-jar/src/error.rs index 760a9446ddfd..c769f0db8630 100644 --- a/crates/storage/nippy-jar/src/error.rs +++ b/crates/storage/nippy-jar/src/error.rs @@ -7,6 +7,8 @@ pub enum NippyJarError { Internal(#[from] Box), #[error(transparent)] Disconnect(#[from] std::io::Error), + #[error(transparent)] + FileSystem(#[from] reth_primitives::fs::FsPathError), #[error("{0}")] Custom(String), #[error(transparent)] diff --git a/crates/storage/nippy-jar/src/lib.rs b/crates/storage/nippy-jar/src/lib.rs index 46b7dc26bc68..f7b0c7b31a7d 100644 --- a/crates/storage/nippy-jar/src/lib.rs +++ b/crates/storage/nippy-jar/src/lib.rs @@ -206,6 +206,16 @@ impl NippyJar { &self.user_header } + /// Gets total columns in jar. + pub fn columns(&self) -> usize { + self.columns + } + + /// Gets total rows in jar. + pub fn rows(&self) -> usize { + self.rows + } + /// Returns the size of inclusion filter pub fn filter_size(&self) -> usize { self.size() @@ -232,7 +242,9 @@ impl NippyJar { /// **The user must ensure the header type matches the one used during the jar's creation.** pub fn load(path: &Path) -> Result { // Read [`Self`] located at the data file. - let config_file = File::open(path.with_extension(CONFIG_FILE_EXTENSION))?; + let config_path = path.with_extension(CONFIG_FILE_EXTENSION); + let config_file = File::open(&config_path) + .map_err(|err| reth_primitives::fs::FsPathError::open(err, config_path))?; let mut obj: Self = bincode::deserialize_from(&config_file)?; obj.path = path.to_path_buf(); @@ -269,6 +281,21 @@ impl NippyJar { self.path.with_extension(CONFIG_FILE_EXTENSION) } + /// Deletes from disk this [`NippyJar`] alongside every satellite file. + pub fn delete(self) -> Result<(), NippyJarError> { + // TODO(joshie): ensure consistency on unexpected shutdown + + for path in + [self.data_path().into(), self.index_path(), self.offsets_path(), self.config_path()] + { + if path.exists() { + reth_primitives::fs::remove_file(path)?; + } + } + + Ok(()) + } + /// Returns a [`DataReader`] of the data and offset file pub fn open_data_reader(&self) -> Result { DataReader::new(self.data_path()) @@ -338,14 +365,17 @@ impl NippyJar { /// Writes all data and configuration to a file and the offset index to another. pub fn freeze( - &mut self, + mut self, columns: Vec>>>, total_rows: u64, - ) -> Result<(), NippyJarError> { + ) -> Result { self.check_before_freeze(&columns)?; debug!(target: "nippy-jar", path=?self.data_path(), "Opening data file."); + // Write phf, filter and offset index to file + self.freeze_filters()?; + // Creates the writer, data and offsets file let mut writer = NippyJarWriter::new(self)?; @@ -355,12 +385,9 @@ impl NippyJar { // Flushes configuration and offsets to disk writer.commit()?; - // Write phf, filter and offset index to file - self.freeze_filters()?; - - debug!(target: "nippy-jar", jar=?self, "Finished writing data."); + debug!(target: "nippy-jar", ?writer, "Finished writing data."); - Ok(()) + Ok(writer.into_jar()) } /// Freezes [`PerfectHashingFunction`], [`InclusionFilter`] and the offset index to file. @@ -428,9 +455,9 @@ impl PerfectHashingFunction for NippyJar { } } -/// Manages the reading of snapshot data using memory-mapped files. +/// Manages the reading of static file data using memory-mapped files. /// -/// Holds file and mmap descriptors of the data and offsets files of a snapshot. +/// Holds file and mmap descriptors of the data and offsets files of a static_file. #[derive(Debug)] pub struct DataReader { /// Data file descriptor. Needs to be kept alive as long as `data_mmap` handle. @@ -558,15 +585,21 @@ mod tests { let num_rows = col1.len() as u64; let file_path = tempfile::NamedTempFile::new().unwrap(); - let mut nippy = NippyJar::new_without_header(num_columns, file_path.path()); - assert!(matches!(NippyJar::set_keys(&mut nippy, &col1), Err(NippyJarError::PHFMissing))); + let create_nippy = || -> NippyJar<()> { + let mut nippy = NippyJar::new_without_header(num_columns, file_path.path()); + assert!(matches!( + NippyJar::set_keys(&mut nippy, &col1), + Err(NippyJarError::PHFMissing) + )); + nippy + }; - let check_phf = |nippy: &mut NippyJar<_>| { + let check_phf = |mut nippy: NippyJar<_>| { assert!(matches!( - NippyJar::get_index(nippy, &col1[0]), + NippyJar::get_index(&nippy, &col1[0]), Err(NippyJarError::PHFMissingKeys) )); - assert!(NippyJar::set_keys(nippy, &col1).is_ok()); + assert!(NippyJar::set_keys(&mut nippy, &col1).is_ok()); let collect_indexes = |nippy: &NippyJar<_>| -> Vec { col1.iter() @@ -575,12 +608,12 @@ mod tests { }; // Ensure all indexes are unique - let indexes = collect_indexes(nippy); + let indexes = collect_indexes(&nippy); assert_eq!(indexes.iter().collect::>().len(), indexes.len()); // Ensure reproducibility - assert!(NippyJar::set_keys(nippy, &col1).is_ok()); - assert_eq!(indexes, collect_indexes(nippy)); + assert!(NippyJar::set_keys(&mut nippy, &col1).is_ok()); + assert_eq!(indexes, collect_indexes(&nippy)); // Ensure that loaded phf provides the same function outputs nippy.prepare_index(clone_with_result(&col1), col1.len()).unwrap(); @@ -593,12 +626,10 @@ mod tests { }; // fmph bytes size for 100 values of 32 bytes: 54 - nippy = nippy.with_fmph(); - check_phf(&mut nippy); + check_phf(create_nippy().with_fmph()); // fmph bytes size for 100 values of 32 bytes: 46 - nippy = nippy.with_gofmph(); - check_phf(&mut nippy); + check_phf(create_nippy().with_gofmph()); } #[test] @@ -631,7 +662,9 @@ mod tests { assert!(InclusionFilter::add(&mut nippy, &col1[2]).is_ok()); assert!(InclusionFilter::add(&mut nippy, &col1[3]).is_ok()); - nippy.freeze(vec![clone_with_result(&col1), clone_with_result(&col2)], num_rows).unwrap(); + let nippy = nippy + .freeze(vec![clone_with_result(&col1), clone_with_result(&col2)], num_rows) + .unwrap(); let mut loaded_nippy = NippyJar::load_without_header(file_path.path()).unwrap(); loaded_nippy.load_filters().unwrap(); @@ -675,6 +708,10 @@ mod tests { Err(NippyJarError::CompressorNotReady) )); + let mut nippy = + NippyJar::new_without_header(num_columns, file_path.path()).with_zstd(true, 5000); + assert!(nippy.compressor().is_some()); + nippy.prepare_compression(vec![col1.clone(), col2.clone()]).unwrap(); if let Some(Compressors::Zstd(zstd)) = &nippy.compressor() { @@ -684,7 +721,9 @@ mod tests { )); } - nippy.freeze(vec![clone_with_result(&col1), clone_with_result(&col2)], num_rows).unwrap(); + let nippy = nippy + .freeze(vec![clone_with_result(&col1), clone_with_result(&col2)], num_rows) + .unwrap(); let mut loaded_nippy = NippyJar::load_without_header(file_path.path()).unwrap(); loaded_nippy.load_filters().unwrap(); @@ -724,10 +763,12 @@ mod tests { let nippy = NippyJar::new_without_header(num_columns, file_path.path()); assert!(nippy.compressor().is_none()); - let mut nippy = NippyJar::new_without_header(num_columns, file_path.path()).with_lz4(); + let nippy = NippyJar::new_without_header(num_columns, file_path.path()).with_lz4(); assert!(nippy.compressor().is_some()); - nippy.freeze(vec![clone_with_result(&col1), clone_with_result(&col2)], num_rows).unwrap(); + let nippy = nippy + .freeze(vec![clone_with_result(&col1), clone_with_result(&col2)], num_rows) + .unwrap(); let mut loaded_nippy = NippyJar::load_without_header(file_path.path()).unwrap(); loaded_nippy.load_filters().unwrap(); @@ -760,11 +801,13 @@ mod tests { let nippy = NippyJar::new_without_header(num_columns, file_path.path()); assert!(nippy.compressor().is_none()); - let mut nippy = + let nippy = NippyJar::new_without_header(num_columns, file_path.path()).with_zstd(false, 5000); assert!(nippy.compressor().is_some()); - nippy.freeze(vec![clone_with_result(&col1), clone_with_result(&col2)], num_rows).unwrap(); + let nippy = nippy + .freeze(vec![clone_with_result(&col1), clone_with_result(&col2)], num_rows) + .unwrap(); let mut loaded_nippy = NippyJar::load_without_header(file_path.path()).unwrap(); loaded_nippy.load_filters().unwrap(); @@ -903,7 +946,7 @@ mod tests { let mut data = col1.iter().zip(col2.iter()).enumerate().collect::>(); data.shuffle(&mut rand::thread_rng()); - // Imagine `Blocks` snapshot file has two columns: `Block | StoredWithdrawals` + // Imagine `Blocks` static file has two columns: `Block | StoredWithdrawals` const BLOCKS_FULL_MASK: usize = 0b11; // Read both columns @@ -1047,7 +1090,7 @@ mod tests { col1: &[Vec], col2: &[Vec], ) { - let mut nippy = NippyJar::load_without_header(file_path).unwrap(); + let nippy = NippyJar::load_without_header(file_path).unwrap(); // Set the baseline that should be unwinded to let initial_rows = nippy.rows; @@ -1059,7 +1102,7 @@ mod tests { assert!(initial_offset_size > 0); // Appends a third row - let mut writer = NippyJarWriter::new(&mut nippy).unwrap(); + let mut writer = NippyJarWriter::new(nippy).unwrap(); writer.append_column(Some(Ok(&col1[2]))).unwrap(); writer.append_column(Some(Ok(&col2[2]))).unwrap(); @@ -1073,7 +1116,7 @@ mod tests { // Simulate an unexpected shutdown of the writer, before it can finish commit() drop(writer); - let mut nippy = NippyJar::load_without_header(file_path).unwrap(); + let nippy = NippyJar::load_without_header(file_path).unwrap(); assert_eq!(initial_rows, nippy.rows); // Data was written successfuly @@ -1090,21 +1133,20 @@ mod tests { // Writer will execute a consistency check and verify first that the offset list on disk // doesn't match the nippy.rows, and prune it. Then, it will prune the data file // accordingly as well. - let _writer = NippyJarWriter::new(&mut nippy).unwrap(); - assert_eq!(initial_rows, nippy.rows); + let writer = NippyJarWriter::new(nippy).unwrap(); + assert_eq!(initial_rows, writer.rows()); assert_eq!( initial_offset_size, - File::open(nippy.offsets_path()).unwrap().metadata().unwrap().len() as usize + File::open(writer.offsets_path()).unwrap().metadata().unwrap().len() as usize ); assert_eq!( initial_data_size, - File::open(nippy.data_path()).unwrap().metadata().unwrap().len() as usize + File::open(writer.data_path()).unwrap().metadata().unwrap().len() as usize ); - assert_eq!(initial_rows, nippy.rows); } fn test_append_consistency_no_commit(file_path: &Path, col1: &[Vec], col2: &[Vec]) { - let mut nippy = NippyJar::load_without_header(file_path).unwrap(); + let nippy = NippyJar::load_without_header(file_path).unwrap(); // Set the baseline that should be unwinded to let initial_rows = nippy.rows; @@ -1117,14 +1159,14 @@ mod tests { // Appends a third row, so we have an offset list in memory, which is not flushed to disk, // while the data has been. - let mut writer = NippyJarWriter::new(&mut nippy).unwrap(); + let mut writer = NippyJarWriter::new(nippy).unwrap(); writer.append_column(Some(Ok(&col1[2]))).unwrap(); writer.append_column(Some(Ok(&col2[2]))).unwrap(); // Simulate an unexpected shutdown of the writer, before it can call commit() drop(writer); - let mut nippy = NippyJar::load_without_header(file_path).unwrap(); + let nippy = NippyJar::load_without_header(file_path).unwrap(); assert_eq!(initial_rows, nippy.rows); // Data was written successfuly @@ -1140,13 +1182,12 @@ mod tests { // Writer will execute a consistency check and verify that the data file has more data than // it should, and resets it to the last offset of the list (on disk here) - let _writer = NippyJarWriter::new(&mut nippy).unwrap(); - assert_eq!(initial_rows, nippy.rows); + let writer = NippyJarWriter::new(nippy).unwrap(); + assert_eq!(initial_rows, writer.rows()); assert_eq!( initial_data_size, - File::open(nippy.data_path()).unwrap().metadata().unwrap().len() as usize + File::open(writer.data_path()).unwrap().metadata().unwrap().len() as usize ); - assert_eq!(initial_rows, nippy.rows); } fn append_two_rows(num_columns: usize, file_path: &Path, col1: &[Vec], col2: &[Vec]) { @@ -1157,7 +1198,7 @@ mod tests { assert_eq!(nippy.max_row_size, 0); assert_eq!(nippy.rows, 0); - let mut writer = NippyJarWriter::new(&mut nippy).unwrap(); + let mut writer = NippyJarWriter::new(nippy).unwrap(); assert_eq!(writer.column(), 0); writer.append_column(Some(Ok(&col1[0]))).unwrap(); @@ -1173,26 +1214,26 @@ mod tests { let expected_data_file_size = *writer.offsets().last().unwrap(); writer.commit().unwrap(); - assert_eq!(nippy.max_row_size, col1[0].len() + col2[0].len()); - assert_eq!(nippy.rows, 1); + assert_eq!(writer.max_row_size(), col1[0].len() + col2[0].len()); + assert_eq!(writer.rows(), 1); assert_eq!( - File::open(nippy.offsets_path()).unwrap().metadata().unwrap().len(), + File::open(writer.offsets_path()).unwrap().metadata().unwrap().len(), 1 + num_columns as u64 * 8 + 8 ); assert_eq!( - File::open(nippy.data_path()).unwrap().metadata().unwrap().len(), + File::open(writer.data_path()).unwrap().metadata().unwrap().len(), expected_data_file_size ); } // Load and add 1 row { - let mut nippy = NippyJar::load_without_header(file_path).unwrap(); + let nippy = NippyJar::load_without_header(file_path).unwrap(); // Check if it was committed successfuly assert_eq!(nippy.max_row_size, col1[0].len() + col2[0].len()); assert_eq!(nippy.rows, 1); - let mut writer = NippyJarWriter::new(&mut nippy).unwrap(); + let mut writer = NippyJarWriter::new(nippy).unwrap(); assert_eq!(writer.column(), 0); writer.append_column(Some(Ok(&col1[1]))).unwrap(); @@ -1208,22 +1249,22 @@ mod tests { let expected_data_file_size = *writer.offsets().last().unwrap(); writer.commit().unwrap(); - assert_eq!(nippy.max_row_size, col1[0].len() + col2[0].len()); - assert_eq!(nippy.rows, 2); + assert_eq!(writer.max_row_size(), col1[0].len() + col2[0].len()); + assert_eq!(writer.rows(), 2); assert_eq!( - File::open(nippy.offsets_path()).unwrap().metadata().unwrap().len(), - 1 + nippy.rows as u64 * num_columns as u64 * 8 + 8 + File::open(writer.offsets_path()).unwrap().metadata().unwrap().len(), + 1 + writer.rows() as u64 * num_columns as u64 * 8 + 8 ); assert_eq!( - File::open(nippy.data_path()).unwrap().metadata().unwrap().len(), + File::open(writer.data_path()).unwrap().metadata().unwrap().len(), expected_data_file_size ); } } fn prune_rows(num_columns: usize, file_path: &Path, col1: &[Vec], col2: &[Vec]) { - let mut nippy = NippyJar::load_without_header(file_path).unwrap(); - let mut writer = NippyJarWriter::new(&mut nippy).unwrap(); + let nippy = NippyJar::load_without_header(file_path).unwrap(); + let mut writer = NippyJarWriter::new(nippy).unwrap(); // Appends a third row, so we have an offset list in memory, which is not flushed to disk writer.append_column(Some(Ok(&col1[2]))).unwrap(); @@ -1231,32 +1272,38 @@ mod tests { // This should prune from the on-memory offset list and ondisk offset list writer.prune_rows(2).unwrap(); - assert_eq!(nippy.rows, 1); + assert_eq!(writer.rows(), 1); assert_eq!( - File::open(nippy.offsets_path()).unwrap().metadata().unwrap().len(), - 1 + nippy.rows as u64 * num_columns as u64 * 8 + 8 + File::open(writer.offsets_path()).unwrap().metadata().unwrap().len(), + 1 + writer.rows() as u64 * num_columns as u64 * 8 + 8 ); let expected_data_size = col1[0].len() + col2[0].len(); assert_eq!( - File::open(nippy.data_path()).unwrap().metadata().unwrap().len() as usize, + File::open(writer.data_path()).unwrap().metadata().unwrap().len() as usize, expected_data_size ); - let data_reader = nippy.open_data_reader().unwrap(); - // there are only two valid offsets. so index 2 actually represents the expected file - // data size. - assert_eq!(data_reader.offset(2), expected_data_size as u64); + let nippy = NippyJar::load_without_header(file_path).unwrap(); + { + let data_reader = nippy.open_data_reader().unwrap(); + // there are only two valid offsets. so index 2 actually represents the expected file + // data size. + assert_eq!(data_reader.offset(2), expected_data_size as u64); + } // This should prune from the ondisk offset list and clear the jar. - let mut writer = NippyJarWriter::new(&mut nippy).unwrap(); + let mut writer = NippyJarWriter::new(nippy).unwrap(); writer.prune_rows(1).unwrap(); - assert_eq!(nippy.rows, 0); - assert_eq!(nippy.max_row_size, 0); - assert_eq!(File::open(nippy.data_path()).unwrap().metadata().unwrap().len() as usize, 0); + assert_eq!(writer.rows(), 0); + assert_eq!(writer.max_row_size(), 0); + assert_eq!(File::open(writer.data_path()).unwrap().metadata().unwrap().len() as usize, 0); // Only the byte that indicates how many bytes per offset should be left - assert_eq!(File::open(nippy.offsets_path()).unwrap().metadata().unwrap().len() as usize, 1); + assert_eq!( + File::open(writer.offsets_path()).unwrap().metadata().unwrap().len() as usize, + 1 + ); } fn simulate_interrupted_prune( @@ -1265,7 +1312,7 @@ mod tests { num_rows: u64, missing_offsets: u64, ) { - let mut nippy = NippyJar::load_without_header(file_path).unwrap(); + let nippy = NippyJar::load_without_header(file_path).unwrap(); let reader = nippy.open_data_reader().unwrap(); let offsets_file = OpenOptions::new().read(true).write(true).open(nippy.offsets_path()).unwrap(); @@ -1284,6 +1331,6 @@ mod tests { data_file.set_len(data_len - 32 * missing_offsets).unwrap(); // runs the consistency check. - let _ = NippyJarWriter::new(&mut nippy).unwrap(); + let _ = NippyJarWriter::new(nippy).unwrap(); } } diff --git a/crates/storage/nippy-jar/src/writer.rs b/crates/storage/nippy-jar/src/writer.rs index b8de6454c27d..8ab8bd47b4b7 100644 --- a/crates/storage/nippy-jar/src/writer.rs +++ b/crates/storage/nippy-jar/src/writer.rs @@ -1,9 +1,8 @@ use crate::{compression::Compression, ColumnResult, NippyJar, NippyJarError, NippyJarHeader}; use std::{ cmp::Ordering, - fmt, fs::{File, OpenOptions}, - io::{Read, Seek, SeekFrom, Write}, + io::{BufWriter, Read, Seek, SeekFrom, Write}, path::Path, }; @@ -23,14 +22,15 @@ const OFFSET_SIZE_BYTES: u64 = 8; /// /// ## Data file layout /// The data file is represented just as a sequence of bytes of data without any delimiters -pub struct NippyJarWriter<'a, H> { - /// Reference to the associated [`NippyJar`], containing all necessary configurations for data +#[derive(Debug)] +pub struct NippyJarWriter { + /// Associated [`NippyJar`], containing all necessary configurations for data /// handling. - jar: &'a mut NippyJar, + jar: NippyJar, /// File handle to where the data is stored. - data_file: File, + data_file: BufWriter, /// File handle to where the offsets are stored. - offsets_file: File, + offsets_file: BufWriter, /// Temporary buffer to reuse when compressing data. tmp_buf: Vec, /// Used to find the maximum uncompressed size of a row in a jar. @@ -41,21 +41,19 @@ pub struct NippyJarWriter<'a, H> { column: usize, } -impl fmt::Debug for NippyJarWriter<'_, H> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("NippyJarWriter").finish_non_exhaustive() - } -} - -impl<'a, H: NippyJarHeader> NippyJarWriter<'a, H> { - pub fn new(jar: &'a mut NippyJar) -> Result { +impl NippyJarWriter { + /// Creates a [`NippyJarWriter`] from [`NippyJar`]. + pub fn new(mut jar: NippyJar) -> Result { let (data_file, offsets_file, is_created) = Self::create_or_open_files(jar.data_path(), &jar.offsets_path())?; + // Makes sure we don't have dangling data and offset files + jar.freeze_config()?; + let mut writer = Self { jar, - data_file, - offsets_file, + data_file: BufWriter::new(data_file), + offsets_file: BufWriter::new(offsets_file), tmp_buf: Vec::with_capacity(1_000_000), uncompressed_row_size: 0, offsets: Vec::with_capacity(1_000_000), @@ -66,35 +64,56 @@ impl<'a, H: NippyJarHeader> NippyJarWriter<'a, H> { // changes if necessary. if !is_created { writer.check_consistency_and_heal()?; + writer.commit()?; } Ok(writer) } + /// Returns a reference to `H` of [`NippyJar`] + pub fn user_header(&self) -> &H { + &self.jar.user_header + } + + /// Returns a mutable reference to `H` of [`NippyJar`] + pub fn user_header_mut(&mut self) -> &mut H { + &mut self.jar.user_header + } + + /// Gets total writter rows in jar. + pub fn rows(&self) -> usize { + self.jar.rows() + } + + /// Consumes the writer and returns the associated [`NippyJar`]. + pub fn into_jar(self) -> NippyJar { + self.jar + } + fn create_or_open_files( data: &Path, offsets: &Path, ) -> Result<(File, File, bool), NippyJarError> { let is_created = !data.exists() || !offsets.exists(); - let mut data_file = if !data.exists() { - File::create(data)? - } else { - OpenOptions::new().read(true).write(true).open(data)? - }; + if !data.exists() { + // File::create is write-only (no reading possible) + File::create(data)?; + } + + let mut data_file = OpenOptions::new().read(true).write(true).open(data)?; data_file.seek(SeekFrom::End(0))?; - let mut offsets_file = if !offsets.exists() { - let mut offsets = File::create(offsets)?; + if !offsets.exists() { + // File::create is write-only (no reading possible) + File::create(offsets)?; + } - // First byte of the offset file is the size of one offset in bytes - offsets.write_all(&[OFFSET_SIZE_BYTES as u8])?; - offsets.sync_all()?; + let mut offsets_file = OpenOptions::new().read(true).write(true).open(offsets)?; - offsets - } else { - OpenOptions::new().read(true).write(true).open(offsets)? - }; + // First byte of the offset file is the size of one offset in bytes + offsets_file.write_all(&[OFFSET_SIZE_BYTES as u8])?; + offsets_file.sync_all()?; offsets_file.seek(SeekFrom::End(0))?; Ok((data_file, offsets_file, is_created)) @@ -118,7 +137,7 @@ impl<'a, H: NippyJarHeader> NippyJarWriter<'a, H> { let expected_offsets_file_size = 1 + // first byte is the size of one offset OFFSET_SIZE_BYTES * self.jar.rows as u64 * self.jar.columns as u64 + // `offset size * num rows * num columns` OFFSET_SIZE_BYTES; // expected size of the data file - let actual_offsets_file_size = self.offsets_file.metadata()?.len(); + let actual_offsets_file_size = self.offsets_file.get_ref().metadata()?.len(); // Offsets configuration wasn't properly committed match expected_offsets_file_size.cmp(&actual_offsets_file_size) { @@ -126,7 +145,7 @@ impl<'a, H: NippyJarHeader> NippyJarWriter<'a, H> { // Happened during an appending job // TODO: ideally we could truncate until the last offset of the last column of the // last row inserted - self.offsets_file.set_len(expected_offsets_file_size)?; + self.offsets_file.get_mut().set_len(expected_offsets_file_size)?; } Ordering::Greater => { // Happened during a pruning job @@ -145,14 +164,14 @@ impl<'a, H: NippyJarHeader> NippyJarWriter<'a, H> { // last offset should match the data_file_len let last_offset = reader.reverse_offset(0)?; - let data_file_len = self.data_file.metadata()?.len(); + let data_file_len = self.data_file.get_ref().metadata()?.len(); // Offset list wasn't properly committed match last_offset.cmp(&data_file_len) { Ordering::Less => { // Happened during an appending job, so we need to truncate the data, since there's // no way to recover it. - self.data_file.set_len(last_offset)?; + self.data_file.get_mut().set_len(last_offset)?; } Ordering::Greater => { // Happened during a pruning job, so we need to reverse iterate offsets until we @@ -160,12 +179,13 @@ impl<'a, H: NippyJarHeader> NippyJarWriter<'a, H> { for index in 0..reader.offsets_count()? { let offset = reader.reverse_offset(index + 1)?; if offset == data_file_len { - self.offsets_file.set_len( - self.offsets_file - .metadata()? - .len() - .saturating_sub(OFFSET_SIZE_BYTES * (index as u64 + 1)), - )?; + let new_len = self + .offsets_file + .get_ref() + .metadata()? + .len() + .saturating_sub(OFFSET_SIZE_BYTES * (index as u64 + 1)); + self.offsets_file.get_mut().set_len(new_len)?; drop(reader); @@ -229,11 +249,11 @@ impl<'a, H: NippyJarHeader> NippyJarWriter<'a, H> { self.offsets.push(self.data_file.stream_position()?); } - self.write_column(value.as_ref())?; + let written = self.write_column(value.as_ref())?; // Last offset represents the size of the data file if no more data is to be // appended. Otherwise, represents the offset of the next data item. - self.offsets.push(self.data_file.stream_position()?); + self.offsets.push(self.offsets.last().expect("qed") + written as u64); } None => { return Err(NippyJarError::UnexpectedMissingValue( @@ -248,15 +268,17 @@ impl<'a, H: NippyJarHeader> NippyJarWriter<'a, H> { } /// Writes column to data file. If it's the last column of the row, call `finalize_row()` - fn write_column(&mut self, value: &[u8]) -> Result<(), NippyJarError> { + fn write_column(&mut self, value: &[u8]) -> Result { self.uncompressed_row_size += value.len(); - if let Some(compression) = &self.jar.compressor { + let len = if let Some(compression) = &self.jar.compressor { let before = self.tmp_buf.len(); let len = compression.compress_to(value, &mut self.tmp_buf)?; self.data_file.write_all(&self.tmp_buf[before..before + len])?; + len } else { self.data_file.write_all(value)?; - } + value.len() + }; self.column += 1; @@ -264,11 +286,14 @@ impl<'a, H: NippyJarHeader> NippyJarWriter<'a, H> { self.finalize_row(); } - Ok(()) + Ok(len) } /// Prunes rows from data and offsets file and updates its configuration on disk pub fn prune_rows(&mut self, num_rows: usize) -> Result<(), NippyJarError> { + self.offsets_file.flush()?; + self.data_file.flush()?; + // Each column of a row is one offset let num_offsets = num_rows * self.jar.columns; @@ -283,13 +308,13 @@ impl<'a, H: NippyJarHeader> NippyJarWriter<'a, H> { self.offsets.truncate(self.offsets.len() - offsets_prune_count); // Truncate the data file to the new length - self.data_file.set_len(new_len)?; + self.data_file.get_mut().set_len(new_len)?; } // Prune from on-disk offset list if there are still rows left to prune if remaining_to_prune > 0 { // Get the current length of the on-disk offset file - let length = self.offsets_file.metadata()?.len(); + let length = self.offsets_file.get_ref().metadata()?.len(); // Handle non-empty offset file if length > 1 { @@ -308,8 +333,8 @@ impl<'a, H: NippyJarHeader> NippyJarWriter<'a, H> { // If all rows are to be pruned if new_num_offsets <= 1 { // <= 1 because the one offset would actually be the expected file data size - self.offsets_file.set_len(1)?; - self.data_file.set_len(0)?; + self.offsets_file.get_mut().set_len(1)?; + self.data_file.get_mut().set_len(0)?; } else { // Calculate the new length for the on-disk offset list let new_len = 1 + new_num_offsets * OFFSET_SIZE_BYTES; @@ -318,20 +343,20 @@ impl<'a, H: NippyJarHeader> NippyJarWriter<'a, H> { .seek(SeekFrom::Start(new_len.saturating_sub(OFFSET_SIZE_BYTES)))?; // Read the last offset value let mut last_offset = [0u8; OFFSET_SIZE_BYTES as usize]; - self.offsets_file.read_exact(&mut last_offset)?; + self.offsets_file.get_ref().read_exact(&mut last_offset)?; let last_offset = u64::from_le_bytes(last_offset); // Update the lengths of both the offsets and data files - self.offsets_file.set_len(new_len)?; - self.data_file.set_len(last_offset)?; + self.offsets_file.get_mut().set_len(new_len)?; + self.data_file.get_mut().set_len(last_offset)?; } } else { return Err(NippyJarError::InvalidPruning(0, remaining_to_prune as u64)) } } - self.offsets_file.sync_all()?; - self.data_file.sync_all()?; + self.offsets_file.get_ref().sync_all()?; + self.data_file.get_ref().sync_all()?; self.offsets_file.seek(SeekFrom::End(0))?; self.data_file.seek(SeekFrom::End(0))?; @@ -358,7 +383,8 @@ impl<'a, H: NippyJarHeader> NippyJarWriter<'a, H> { /// Commits configuration and offsets to disk. It drains the internal offset list. pub fn commit(&mut self) -> Result<(), NippyJarError> { - self.data_file.sync_all()?; + self.data_file.flush()?; + self.data_file.get_ref().sync_all()?; self.commit_offsets()?; @@ -374,11 +400,11 @@ impl<'a, H: NippyJarHeader> NippyJarWriter<'a, H> { // `append_column()` works alongside commit. So we need to skip it. let mut last_offset_ondisk = None; - if self.offsets_file.metadata()?.len() > 1 { + if self.offsets_file.get_ref().metadata()?.len() > 1 { self.offsets_file.seek(SeekFrom::End(-(OFFSET_SIZE_BYTES as i64)))?; let mut buf = [0u8; OFFSET_SIZE_BYTES as usize]; - self.offsets_file.read_exact(&mut buf)?; + self.offsets_file.get_ref().read_exact(&mut buf)?; last_offset_ondisk = Some(u64::from_le_bytes(buf)); } @@ -393,11 +419,17 @@ impl<'a, H: NippyJarHeader> NippyJarWriter<'a, H> { } self.offsets_file.write_all(&offset.to_le_bytes())?; } - self.offsets_file.sync_all()?; + self.offsets_file.flush()?; + self.offsets_file.get_ref().sync_all()?; Ok(()) } + #[cfg(test)] + pub fn max_row_size(&self) -> usize { + self.jar.max_row_size + } + #[cfg(test)] pub fn column(&self) -> usize { self.column @@ -412,4 +444,14 @@ impl<'a, H: NippyJarHeader> NippyJarWriter<'a, H> { pub fn offsets_mut(&mut self) -> &mut Vec { &mut self.offsets } + + #[cfg(test)] + pub fn offsets_path(&self) -> std::path::PathBuf { + self.jar.offsets_path() + } + + #[cfg(test)] + pub fn data_path(&self) -> &Path { + self.jar.data_path() + } } diff --git a/crates/storage/provider/Cargo.toml b/crates/storage/provider/Cargo.toml index bc442a5d002f..8b11fe69f66d 100644 --- a/crates/storage/provider/Cargo.toml +++ b/crates/storage/provider/Cargo.toml @@ -18,6 +18,7 @@ reth-interfaces.workspace = true reth-db.workspace = true reth-trie.workspace = true reth-nippy-jar.workspace = true +reth-codecs.workspace = true reth-node-api.workspace = true revm.workspace = true diff --git a/crates/storage/provider/src/bundle_state/bundle_state_with_receipts.rs b/crates/storage/provider/src/bundle_state/bundle_state_with_receipts.rs index c87c6ede80aa..7b6e5f1fecc5 100644 --- a/crates/storage/provider/src/bundle_state/bundle_state_with_receipts.rs +++ b/crates/storage/provider/src/bundle_state/bundle_state_with_receipts.rs @@ -1,15 +1,15 @@ -use crate::{StateChanges, StateReverts}; +use crate::{providers::StaticFileProviderRWRefMut, StateChanges, StateReverts}; use reth_db::{ cursor::{DbCursorRO, DbCursorRW}, tables, transaction::{DbTx, DbTxMut}, }; -use reth_interfaces::db::DatabaseError; +use reth_interfaces::provider::{ProviderError, ProviderResult}; use reth_primitives::{ logs_bloom, revm::compat::{into_reth_acc, into_revm_acc}, - Account, Address, BlockNumber, Bloom, Bytecode, Log, Receipt, Receipts, StorageEntry, B256, - U256, + Account, Address, BlockNumber, Bloom, Bytecode, Log, Receipt, Receipts, StaticFileSegment, + StorageEntry, B256, U256, }; use reth_trie::HashedPostState; use revm::{ @@ -285,15 +285,21 @@ impl BundleStateWithReceipts { std::mem::swap(&mut self.bundle, &mut other) } - /// Write the [BundleStateWithReceipts] to the database. + /// Write the [BundleStateWithReceipts] to database and receipts to either database or static + /// files if `static_file_producer` is `Some`. It should be none if there is any kind of + /// pruning/filtering over the receipts. /// - /// `is_value_known` should be set to `Not` if the [BundleStateWithReceipts] has some of its - /// state detached, This would make some original values not known. - pub fn write_to_db( + /// `omit_changed_check` should be set to true of bundle has some of it data + /// detached, This would make some original values not known. + pub fn write_to_storage( self, tx: &TX, + mut static_file_producer: Option>, is_value_known: OriginalValuesKnown, - ) -> Result<(), DatabaseError> { + ) -> ProviderResult<()> + where + TX: DbTxMut + DbTx, + { let (plain_state, reverts) = self.bundle.into_plain_state_and_reverts(is_value_known); StateReverts(reverts).write_to_db(tx, self.first_block)?; @@ -303,15 +309,22 @@ impl BundleStateWithReceipts { let mut receipts_cursor = tx.cursor_write::()?; for (idx, receipts) in self.receipts.into_iter().enumerate() { - if !receipts.is_empty() { - let block_number = self.first_block + idx as u64; - let (_, body_indices) = - bodies_cursor.seek_exact(block_number)?.unwrap_or_else(|| { - let last_available = bodies_cursor.last().ok().flatten().map(|(number, _)| number); - panic!("body indices for block {block_number} must exist. last available block number: {last_available:?}"); - }); - - let first_tx_index = body_indices.first_tx_num(); + let block_number = self.first_block + idx as u64; + let first_tx_index = bodies_cursor + .seek_exact(block_number)? + .map(|(_, indices)| indices.first_tx_num()) + .ok_or_else(|| ProviderError::BlockBodyIndicesNotFound(block_number))?; + + if let Some(static_file_producer) = &mut static_file_producer { + // Increment block on static file header. + static_file_producer.increment_block(StaticFileSegment::Receipts)?; + + for (tx_idx, receipt) in receipts.into_iter().enumerate() { + let receipt = receipt + .expect("receipt should not be filtered when saving to static files."); + static_file_producer.append_receipt(first_tx_index + tx_idx as u64, receipt)?; + } + } else if !receipts.is_empty() { for (tx_idx, receipt) in receipts.into_iter().enumerate() { if let Some(receipt) = receipt { receipts_cursor.append(first_tx_index + tx_idx as u64, receipt)?; @@ -426,7 +439,7 @@ mod tests { // Check change set let mut changeset_cursor = provider .tx_ref() - .cursor_dup_read::() + .cursor_dup_read::() .expect("Could not open changeset cursor"); assert_eq!( changeset_cursor.seek_exact(1).expect("Could not read account change set"), @@ -549,7 +562,7 @@ mod tests { state.merge_transitions(BundleRetention::Reverts); BundleStateWithReceipts::new(state.take_bundle(), Receipts::new(), 1) - .write_to_db(provider.tx_ref(), OriginalValuesKnown::Yes) + .write_to_storage(provider.tx_ref(), None, OriginalValuesKnown::Yes) .expect("Could not write bundle state to DB"); // Check plain storage state @@ -594,7 +607,7 @@ mod tests { // Check change set let mut changeset_cursor = provider .tx_ref() - .cursor_dup_read::() + .cursor_dup_read::() .expect("Could not open storage changeset cursor"); assert_eq!( changeset_cursor.seek_exact(BlockNumberAddress((1, address_a))).unwrap(), @@ -647,7 +660,7 @@ mod tests { state.merge_transitions(BundleRetention::Reverts); BundleStateWithReceipts::new(state.take_bundle(), Receipts::new(), 2) - .write_to_db(provider.tx_ref(), OriginalValuesKnown::Yes) + .write_to_storage(provider.tx_ref(), None, OriginalValuesKnown::Yes) .expect("Could not write bundle state to DB"); assert_eq!( @@ -711,7 +724,7 @@ mod tests { )])); init_state.merge_transitions(BundleRetention::Reverts); BundleStateWithReceipts::new(init_state.take_bundle(), Receipts::new(), 0) - .write_to_db(provider.tx_ref(), OriginalValuesKnown::Yes) + .write_to_storage(provider.tx_ref(), None, OriginalValuesKnown::Yes) .expect("Could not write init bundle state to DB"); let mut state = State::builder().with_bundle_update().build(); @@ -856,12 +869,12 @@ mod tests { let bundle = state.take_bundle(); BundleStateWithReceipts::new(bundle, Receipts::new(), 1) - .write_to_db(provider.tx_ref(), OriginalValuesKnown::Yes) + .write_to_storage(provider.tx_ref(), None, OriginalValuesKnown::Yes) .expect("Could not write bundle state to DB"); let mut storage_changeset_cursor = provider .tx_ref() - .cursor_dup_read::() + .cursor_dup_read::() .expect("Could not open plain storage state cursor"); let mut storage_changes = storage_changeset_cursor.walk_range(..).unwrap(); @@ -1019,7 +1032,7 @@ mod tests { )])); init_state.merge_transitions(BundleRetention::Reverts); BundleStateWithReceipts::new(init_state.take_bundle(), Receipts::new(), 0) - .write_to_db(provider.tx_ref(), OriginalValuesKnown::Yes) + .write_to_storage(provider.tx_ref(), None, OriginalValuesKnown::Yes) .expect("Could not write init bundle state to DB"); let mut state = State::builder().with_bundle_update().build(); @@ -1064,12 +1077,12 @@ mod tests { // Commit block #1 changes to the database. state.merge_transitions(BundleRetention::Reverts); BundleStateWithReceipts::new(state.take_bundle(), Receipts::new(), 1) - .write_to_db(provider.tx_ref(), OriginalValuesKnown::Yes) + .write_to_storage(provider.tx_ref(), None, OriginalValuesKnown::Yes) .expect("Could not write bundle state to DB"); let mut storage_changeset_cursor = provider .tx_ref() - .cursor_dup_read::() + .cursor_dup_read::() .expect("Could not open plain storage state cursor"); let range = BlockNumberAddress::range(1..=1); let mut storage_changes = storage_changeset_cursor.walk_range(range).unwrap(); @@ -1138,9 +1151,9 @@ mod tests { db.update(|tx| { for (address, (account, storage)) in prestate.iter() { let hashed_address = keccak256(address); - tx.put::(hashed_address, *account).unwrap(); + tx.put::(hashed_address, *account).unwrap(); for (slot, value) in storage { - tx.put::( + tx.put::( hashed_address, StorageEntry { key: keccak256(slot), value: *value }, ) diff --git a/crates/storage/provider/src/bundle_state/hashed_state_changes.rs b/crates/storage/provider/src/bundle_state/hashed_state_changes.rs index 0c14ec49f535..0ddccd8e71be 100644 --- a/crates/storage/provider/src/bundle_state/hashed_state_changes.rs +++ b/crates/storage/provider/src/bundle_state/hashed_state_changes.rs @@ -17,7 +17,7 @@ impl HashedStateChanges { pub fn write_to_db(self, tx: &TX) -> Result<(), DatabaseError> { // Write hashed account updates. let sorted_accounts = self.0.accounts.into_iter().sorted_unstable_by_key(|(key, _)| *key); - let mut hashed_accounts_cursor = tx.cursor_write::()?; + let mut hashed_accounts_cursor = tx.cursor_write::()?; for (hashed_address, account) in sorted_accounts { if let Some(account) = account { hashed_accounts_cursor.upsert(hashed_address, account)?; @@ -28,7 +28,7 @@ impl HashedStateChanges { // Write hashed storage changes. let sorted_storages = self.0.storages.into_iter().sorted_by_key(|(key, _)| *key); - let mut hashed_storage_cursor = tx.cursor_dup_write::()?; + let mut hashed_storage_cursor = tx.cursor_dup_write::()?; for (hashed_address, storage) in sorted_storages { if storage.wiped && hashed_storage_cursor.seek_exact(hashed_address)?.is_some() { hashed_storage_cursor.delete_current_duplicates()?; @@ -74,9 +74,9 @@ mod tests { { let provider_rw = provider_factory.provider_rw().unwrap(); let mut accounts_cursor = - provider_rw.tx_ref().cursor_write::().unwrap(); + provider_rw.tx_ref().cursor_write::().unwrap(); let mut storage_cursor = - provider_rw.tx_ref().cursor_write::().unwrap(); + provider_rw.tx_ref().cursor_write::().unwrap(); for address in addresses { let hashed_address = keccak256(address); @@ -100,13 +100,13 @@ mod tests { let provider = provider_factory.provider().unwrap(); assert_eq!( - provider.tx_ref().get::(destroyed_address_hashed), + provider.tx_ref().get::(destroyed_address_hashed), Ok(None) ); assert_eq!( provider .tx_ref() - .cursor_read::() + .cursor_read::() .unwrap() .seek_by_key_subkey(destroyed_address_hashed, hashed_slot), Ok(None) diff --git a/crates/storage/provider/src/bundle_state/state_reverts.rs b/crates/storage/provider/src/bundle_state/state_reverts.rs index 8b7d5c7c283e..87f87403169c 100644 --- a/crates/storage/provider/src/bundle_state/state_reverts.rs +++ b/crates/storage/provider/src/bundle_state/state_reverts.rs @@ -32,7 +32,7 @@ impl StateReverts { // Write storage changes tracing::trace!(target: "provider::reverts", "Writing storage changes"); let mut storages_cursor = tx.cursor_dup_write::()?; - let mut storage_changeset_cursor = tx.cursor_dup_write::()?; + let mut storage_changeset_cursor = tx.cursor_dup_write::()?; for (block_index, mut storage_changes) in self.0.storage.into_iter().enumerate() { let block_number = first_block + block_index as BlockNumber; @@ -73,7 +73,7 @@ impl StateReverts { // Write account changes tracing::trace!(target: "provider::reverts", "Writing account changes"); - let mut account_changeset_cursor = tx.cursor_dup_write::()?; + let mut account_changeset_cursor = tx.cursor_dup_write::()?; for (block_index, mut account_block_reverts) in self.0.accounts.into_iter().enumerate() { let block_number = first_block + block_index as BlockNumber; // Sort accounts by address. diff --git a/crates/storage/provider/src/providers/database/metrics.rs b/crates/storage/provider/src/providers/database/metrics.rs index 8fb3a1cd3b07..c103ae5f6d05 100644 --- a/crates/storage/provider/src/providers/database/metrics.rs +++ b/crates/storage/provider/src/providers/database/metrics.rs @@ -50,14 +50,14 @@ pub(crate) enum Action { InsertCanonicalHeaders, InsertHeaders, InsertHeaderNumbers, - InsertHeaderTD, + InsertHeaderTerminalDifficulties, InsertBlockOmmers, - InsertTxSenders, + InsertTransactionSenders, InsertTransactions, - InsertTxHashNumbers, + InsertTransactionHashNumbers, InsertBlockWithdrawals, InsertBlockBodyIndices, - InsertTransactionBlock, + InsertTransactionBlocks, GetNextTxNum, GetParentTD, @@ -77,14 +77,14 @@ impl Action { Action::InsertCanonicalHeaders => "insert canonical headers", Action::InsertHeaders => "insert headers", Action::InsertHeaderNumbers => "insert header numbers", - Action::InsertHeaderTD => "insert header TD", + Action::InsertHeaderTerminalDifficulties => "insert header TD", Action::InsertBlockOmmers => "insert block ommers", - Action::InsertTxSenders => "insert tx senders", + Action::InsertTransactionSenders => "insert tx senders", Action::InsertTransactions => "insert transactions", - Action::InsertTxHashNumbers => "insert tx hash numbers", + Action::InsertTransactionHashNumbers => "insert transaction hash numbers", Action::InsertBlockWithdrawals => "insert block withdrawals", Action::InsertBlockBodyIndices => "insert block body indices", - Action::InsertTransactionBlock => "insert transaction block", + Action::InsertTransactionBlocks => "insert transaction blocks", Action::GetNextTxNum => "get next tx num", Action::GetParentTD => "get parent TD", } diff --git a/crates/storage/provider/src/providers/database/mod.rs b/crates/storage/provider/src/providers/database/mod.rs index 4c335df222d2..6752a7d8a4be 100644 --- a/crates/storage/provider/src/providers/database/mod.rs +++ b/crates/storage/provider/src/providers/database/mod.rs @@ -1,8 +1,9 @@ use crate::{ providers::{ state::{historical::HistoricalStateProvider, latest::LatestStateProvider}, - SnapshotProvider, + StaticFileProvider, }, + to_range, traits::{BlockSource, ReceiptProvider}, BlockHashReader, BlockNumReader, BlockReader, ChainSpecProvider, EvmEnvProvider, HeaderProvider, HeaderSyncGap, HeaderSyncGapProvider, HeaderSyncMode, ProviderError, @@ -13,12 +14,11 @@ use reth_db::{database::Database, init_db, models::StoredBlockBodyIndices, Datab use reth_interfaces::{provider::ProviderResult, RethError, RethResult}; use reth_node_api::ConfigureEvmEnv; use reth_primitives::{ - snapshot::HighestSnapshots, stage::{StageCheckpoint, StageId}, Address, Block, BlockHash, BlockHashOrNumber, BlockNumber, BlockWithSenders, ChainInfo, ChainSpec, Header, PruneCheckpoint, PruneSegment, Receipt, SealedBlock, SealedBlockWithSenders, - SealedHeader, TransactionMeta, TransactionSigned, TransactionSignedNoHash, TxHash, TxNumber, - Withdrawal, Withdrawals, B256, U256, + SealedHeader, StaticFileSegment, TransactionMeta, TransactionSigned, TransactionSignedNoHash, + TxHash, TxNumber, Withdrawal, Withdrawals, B256, U256, }; use revm::primitives::{BlockEnv, CfgEnvWithHandlerCfg}; use std::{ @@ -26,7 +26,6 @@ use std::{ path::{Path, PathBuf}, sync::Arc, }; -use tokio::sync::watch; use tracing::trace; mod metrics; @@ -38,49 +37,51 @@ use reth_db::mdbx::DatabaseArguments; /// A common provider that fetches data from a database. /// /// This provider implements most provider or provider factory traits. -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct ProviderFactory { /// Database db: DB, /// Chain spec chain_spec: Arc, - /// Snapshot Provider - snapshot_provider: Option>, -} - -impl Clone for ProviderFactory { - fn clone(&self) -> Self { - Self { - db: self.db.clone(), - chain_spec: Arc::clone(&self.chain_spec), - snapshot_provider: self.snapshot_provider.clone(), - } - } + /// Static File Provider + static_file_provider: StaticFileProvider, } impl ProviderFactory { /// Create new database provider factory. - pub fn new(db: DB, chain_spec: Arc) -> Self { - Self { db, chain_spec, snapshot_provider: None } + pub fn new( + db: DB, + chain_spec: Arc, + static_files_path: PathBuf, + ) -> RethResult> { + Ok(Self { + db, + chain_spec, + static_file_provider: StaticFileProvider::new(static_files_path)?, + }) } - /// Database provider that comes with a shared snapshot provider. - pub fn with_snapshots( - mut self, - snapshots_path: PathBuf, - highest_snapshot_tracker: watch::Receiver>, - ) -> ProviderResult { - self.snapshot_provider = Some(Arc::new( - SnapshotProvider::new(snapshots_path)? - .with_highest_tracker(Some(highest_snapshot_tracker)), - )); - Ok(self) + /// Enables metrics on the static file provider. + pub fn with_static_files_metrics(mut self) -> Self { + self.static_file_provider = self.static_file_provider.with_metrics(); + self } /// Returns reference to the underlying database. pub fn db_ref(&self) -> &DB { &self.db } + + /// Returns static file provider + pub fn static_file_provider(&self) -> StaticFileProvider { + self.static_file_provider.clone() + } + + #[cfg(any(test, feature = "test-utils"))] + /// Consumes Self and returns DB + pub fn into_db(self) -> DB { + self.db + } } impl ProviderFactory { @@ -90,11 +91,12 @@ impl ProviderFactory { path: P, chain_spec: Arc, args: DatabaseArguments, + static_files_path: PathBuf, ) -> RethResult { Ok(ProviderFactory:: { db: init_db(path, args).map_err(|e| RethError::Custom(e.to_string()))?, chain_spec, - snapshot_provider: None, + static_file_provider: StaticFileProvider::new(static_files_path)?, }) } } @@ -105,13 +107,11 @@ impl ProviderFactory { /// [`BlockHashReader`]. This may fail if the inner read database transaction fails to open. #[track_caller] pub fn provider(&self) -> ProviderResult> { - let mut provider = DatabaseProvider::new(self.db.tx()?, self.chain_spec.clone()); - - if let Some(snapshot_provider) = &self.snapshot_provider { - provider = provider.with_snapshot_provider(snapshot_provider.clone()); - } - - Ok(provider) + Ok(DatabaseProvider::new( + self.db.tx()?, + self.chain_spec.clone(), + self.static_file_provider.clone(), + )) } /// Returns a provider with a created `DbTxMut` inside, which allows fetching and updating @@ -120,20 +120,18 @@ impl ProviderFactory { /// open. #[track_caller] pub fn provider_rw(&self) -> ProviderResult> { - let mut provider = DatabaseProvider::new_rw(self.db.tx_mut()?, self.chain_spec.clone()); - - if let Some(snapshot_provider) = &self.snapshot_provider { - provider = provider.with_snapshot_provider(snapshot_provider.clone()); - } - - Ok(DatabaseProviderRW(provider)) + Ok(DatabaseProviderRW(DatabaseProvider::new_rw( + self.db.tx_mut()?, + self.chain_spec.clone(), + self.static_file_provider.clone(), + ))) } /// Storage provider for latest block #[track_caller] pub fn latest(&self) -> ProviderResult { trace!(target: "providers::db", "Returning latest state provider"); - Ok(Box::new(LatestStateProvider::new(self.db.tx()?))) + Ok(Box::new(LatestStateProvider::new(self.db.tx()?, self.static_file_provider()))) } /// Storage provider for state at that given block @@ -145,7 +143,10 @@ impl ProviderFactory { if block_number == provider.best_block_number().unwrap_or_default() && block_number == provider.last_block_number().unwrap_or_default() { - return Ok(Box::new(LatestStateProvider::new(provider.into_tx()))) + return Ok(Box::new(LatestStateProvider::new( + provider.into_tx(), + self.static_file_provider(), + ))) } // +1 as the changeset that we want is the one that was applied after this block. @@ -156,7 +157,11 @@ impl ProviderFactory { let storage_history_prune_checkpoint = provider.get_prune_checkpoint(PruneSegment::StorageHistory)?; - let mut state_provider = HistoricalStateProvider::new(provider.into_tx(), block_number); + let mut state_provider = HistoricalStateProvider::new( + provider.into_tx(), + block_number, + self.static_file_provider(), + ); // If we pruned account or storage history, we can't return state on every historical block. // Instead, we should cap it at the latest prune checkpoint for corresponding prune segment. @@ -219,7 +224,12 @@ impl HeaderProvider for ProviderFactory { } fn header_by_number(&self, num: BlockNumber) -> ProviderResult> { - self.provider()?.header_by_number(num) + self.static_file_provider.get_with_static_file_or_database( + StaticFileSegment::Headers, + num, + |static_file| static_file.header_by_number(num), + || self.provider()?.header_by_number(num), + ) } fn header_td(&self, hash: &BlockHash) -> ProviderResult> { @@ -227,22 +237,44 @@ impl HeaderProvider for ProviderFactory { } fn header_td_by_number(&self, number: BlockNumber) -> ProviderResult> { - self.provider()?.header_td_by_number(number) + if let Some(td) = self.chain_spec.final_paris_total_difficulty(number) { + // if this block is higher than the final paris(merge) block, return the final paris + // difficulty + return Ok(Some(td)) + } + + self.static_file_provider.get_with_static_file_or_database( + StaticFileSegment::Headers, + number, + |static_file| static_file.header_td_by_number(number), + || self.provider()?.header_td_by_number(number), + ) } fn headers_range(&self, range: impl RangeBounds) -> ProviderResult> { - self.provider()?.headers_range(range) + self.static_file_provider.get_range_with_static_file_or_database( + StaticFileSegment::Headers, + to_range(range), + |static_file, range, _| static_file.headers_range(range), + |range, _| self.provider()?.headers_range(range), + |_| true, + ) } fn sealed_header(&self, number: BlockNumber) -> ProviderResult> { - self.provider()?.sealed_header(number) + self.static_file_provider.get_with_static_file_or_database( + StaticFileSegment::Headers, + number, + |static_file| static_file.sealed_header(number), + || self.provider()?.sealed_header(number), + ) } fn sealed_headers_range( &self, range: impl RangeBounds, ) -> ProviderResult> { - self.provider()?.sealed_headers_range(range) + self.sealed_headers_while(range, |_| true) } fn sealed_headers_while( @@ -250,13 +282,24 @@ impl HeaderProvider for ProviderFactory { range: impl RangeBounds, predicate: impl FnMut(&SealedHeader) -> bool, ) -> ProviderResult> { - self.provider()?.sealed_headers_while(range, predicate) + self.static_file_provider.get_range_with_static_file_or_database( + StaticFileSegment::Headers, + to_range(range), + |static_file, range, predicate| static_file.sealed_headers_while(range, predicate), + |range, predicate| self.provider()?.sealed_headers_while(range, predicate), + predicate, + ) } } impl BlockHashReader for ProviderFactory { fn block_hash(&self, number: u64) -> ProviderResult> { - self.provider()?.block_hash(number) + self.static_file_provider.get_with_static_file_or_database( + StaticFileSegment::Headers, + number, + |static_file| static_file.block_hash(number), + || self.provider()?.block_hash(number), + ) } fn canonical_hashes_range( @@ -264,7 +307,13 @@ impl BlockHashReader for ProviderFactory { start: BlockNumber, end: BlockNumber, ) -> ProviderResult> { - self.provider()?.canonical_hashes_range(start, end) + self.static_file_provider.get_range_with_static_file_or_database( + StaticFileSegment::Headers, + start..end, + |static_file, range, _| static_file.canonical_hashes_range(range.start, range.end), + |range, _| self.provider()?.canonical_hashes_range(range.start, range.end), + |_| true, + ) } } @@ -337,14 +386,24 @@ impl TransactionsProvider for ProviderFactory { } fn transaction_by_id(&self, id: TxNumber) -> ProviderResult> { - self.provider()?.transaction_by_id(id) + self.static_file_provider.get_with_static_file_or_database( + StaticFileSegment::Transactions, + id, + |static_file| static_file.transaction_by_id(id), + || self.provider()?.transaction_by_id(id), + ) } fn transaction_by_id_no_hash( &self, id: TxNumber, ) -> ProviderResult> { - self.provider()?.transaction_by_id_no_hash(id) + self.static_file_provider.get_with_static_file_or_database( + StaticFileSegment::Transactions, + id, + |static_file| static_file.transaction_by_id_no_hash(id), + || self.provider()?.transaction_by_id_no_hash(id), + ) } fn transaction_by_hash(&self, hash: TxHash) -> ProviderResult> { @@ -397,7 +456,12 @@ impl TransactionsProvider for ProviderFactory { impl ReceiptProvider for ProviderFactory { fn receipt(&self, id: TxNumber) -> ProviderResult> { - self.provider()?.receipt(id) + self.static_file_provider.get_with_static_file_or_database( + StaticFileSegment::Receipts, + id, + |static_file| static_file.receipt(id), + || self.provider()?.receipt(id), + ) } fn receipt_by_hash(&self, hash: TxHash) -> ProviderResult> { @@ -412,7 +476,13 @@ impl ReceiptProvider for ProviderFactory { &self, range: impl RangeBounds, ) -> ProviderResult> { - self.provider()?.receipts_by_tx_range(range) + self.static_file_provider.get_range_with_static_file_or_database( + StaticFileSegment::Receipts, + to_range(range), + |static_file, range, _| static_file.receipts_by_tx_range(range), + |range, _| self.provider()?.receipts_by_tx_range(range), + |_| true, + ) } } @@ -530,13 +600,16 @@ impl PruneCheckpointReader for ProviderFactory { mod tests { use super::ProviderFactory; use crate::{ - test_utils::create_test_provider_factory, BlockHashReader, BlockNumReader, BlockWriter, - HeaderSyncGapProvider, HeaderSyncMode, TransactionsProvider, + providers::StaticFileWriter, test_utils::create_test_provider_factory, BlockHashReader, + BlockNumReader, BlockWriter, HeaderSyncGapProvider, HeaderSyncMode, TransactionsProvider, }; use alloy_rlp::Decodable; use assert_matches::assert_matches; use rand::Rng; - use reth_db::{tables, test_utils::ERROR_TEMPDIR, transaction::DbTxMut}; + use reth_db::{ + tables, + test_utils::{create_test_static_files_dir, ERROR_TEMPDIR}, + }; use reth_interfaces::{ provider::ProviderError, test_utils::{ @@ -546,7 +619,8 @@ mod tests { RethError, }; use reth_primitives::{ - hex_literal::hex, ChainSpecBuilder, PruneMode, PruneModes, SealedBlock, TxNumber, B256, + hex_literal::hex, ChainSpecBuilder, PruneMode, PruneModes, SealedBlock, StaticFileSegment, + TxNumber, B256, U256, }; use std::{ops::RangeInclusive, sync::Arc}; use tokio::sync::watch; @@ -584,6 +658,7 @@ mod tests { tempfile::TempDir::new().expect(ERROR_TEMPDIR).into_path(), Arc::new(chain_spec), Default::default(), + create_test_static_files_dir(), ) .unwrap(); @@ -648,7 +723,7 @@ mod tests { Ok(_) ); - let senders = provider.get_or_take::(range.clone()); + let senders = provider.get_or_take::(range.clone()); assert_eq!( senders, Ok(range @@ -687,8 +762,6 @@ mod tests { // Genesis let checkpoint = 0; let head = random_header(&mut rng, 0, None); - let gap_fill = random_header(&mut rng, 1, Some(head.hash())); - let gap_tip = random_header(&mut rng, 2, Some(gap_fill.hash())); // Empty database assert_matches!( @@ -698,46 +771,14 @@ mod tests { ); // Checkpoint and no gap - provider - .tx_ref() - .put::(head.number, head.hash()) - .expect("failed to write canonical"); - provider - .tx_ref() - .put::(head.number, head.clone().unseal()) - .expect("failed to write header"); + let mut static_file_writer = + provider.static_file_provider().latest_writer(StaticFileSegment::Headers).unwrap(); + static_file_writer.append_header(head.header().clone(), U256::ZERO, head.hash()).unwrap(); + static_file_writer.commit().unwrap(); + drop(static_file_writer); let gap = provider.sync_gap(mode.clone(), checkpoint).unwrap(); assert_eq!(gap.local_head, head); assert_eq!(gap.target.tip(), consensus_tip.into()); - - // Checkpoint and gap - provider - .tx_ref() - .put::(gap_tip.number, gap_tip.hash()) - .expect("failed to write canonical"); - provider - .tx_ref() - .put::(gap_tip.number, gap_tip.clone().unseal()) - .expect("failed to write header"); - - let gap = provider.sync_gap(mode.clone(), checkpoint).unwrap(); - assert_eq!(gap.local_head, head); - assert_eq!(gap.target.tip(), gap_tip.parent_hash.into()); - - // Checkpoint and gap closed - provider - .tx_ref() - .put::(gap_fill.number, gap_fill.hash()) - .expect("failed to write canonical"); - provider - .tx_ref() - .put::(gap_fill.number, gap_fill.clone().unseal()) - .expect("failed to write header"); - - assert_matches!( - provider.sync_gap(mode, checkpoint), - Err(RethError::Provider(ProviderError::InconsistentHeaderGap)) - ); } } diff --git a/crates/storage/provider/src/providers/database/provider.rs b/crates/storage/provider/src/providers/database/provider.rs index cad3ab17ffcd..f9a2d37d0ea7 100644 --- a/crates/storage/provider/src/providers/database/provider.rs +++ b/crates/storage/provider/src/providers/database/provider.rs @@ -1,6 +1,6 @@ use crate::{ bundle_state::{BundleStateInit, BundleStateWithReceipts, HashedStateChanges, RevertsInit}, - providers::{database::metrics, SnapshotProvider}, + providers::{database::metrics, static_file::StaticFileWriter, StaticFileProvider}, to_range, traits::{ AccountExtReader, BlockSource, ChangeSetReader, ReceiptProvider, StageCheckpointWriter, @@ -8,7 +8,7 @@ use crate::{ AccountReader, BlockExecutionWriter, BlockHashReader, BlockNumReader, BlockReader, BlockWriter, Chain, EvmEnvProvider, HashingWriter, HeaderProvider, HeaderSyncGap, HeaderSyncGapProvider, HeaderSyncMode, HistoryWriter, OriginalValuesKnown, ProviderError, PruneCheckpointReader, - PruneCheckpointWriter, StageCheckpointReader, StorageReader, TransactionVariant, + PruneCheckpointWriter, StageCheckpointReader, StatsReader, StorageReader, TransactionVariant, TransactionsProvider, TransactionsProviderExt, WithdrawalsProvider, }; use itertools::{izip, Itertools}; @@ -28,7 +28,7 @@ use reth_db::{ use reth_interfaces::{ p2p::headers::downloader::SyncTarget, provider::{ProviderResult, RootMismatch}, - RethError, RethResult, + RethResult, }; use reth_node_api::ConfigureEvmEnv; use reth_primitives::{ @@ -38,7 +38,7 @@ use reth_primitives::{ trie::Nibbles, Account, Address, Block, BlockHash, BlockHashOrNumber, BlockNumber, BlockWithSenders, ChainInfo, ChainSpec, GotExpected, Hardfork, Head, Header, PruneCheckpoint, PruneModes, - PruneSegment, Receipt, SealedBlock, SealedBlockWithSenders, SealedHeader, SnapshotSegment, + PruneSegment, Receipt, SealedBlock, SealedBlockWithSenders, SealedHeader, StaticFileSegment, StorageEntry, TransactionMeta, TransactionSigned, TransactionSignedEcRecovered, TransactionSignedNoHash, TxHash, TxNumber, Withdrawal, Withdrawals, B256, U256, }; @@ -49,6 +49,7 @@ use reth_trie::{ }; use revm::primitives::{BlockEnv, CfgEnvWithHandlerCfg, SpecId}; use std::{ + cmp::Ordering, collections::{hash_map, BTreeMap, BTreeSet, HashMap, HashSet}, fmt::Debug, ops::{Bound, Deref, DerefMut, Range, RangeBounds, RangeInclusive}, @@ -82,7 +83,7 @@ impl DerefMut for DatabaseProviderRW { } impl DatabaseProviderRW { - /// Commit database transaction + /// Commit database transaction and static file if it exists. pub fn commit(self) -> ProviderResult { self.0.commit() } @@ -101,15 +102,25 @@ pub struct DatabaseProvider { tx: TX, /// Chain spec chain_spec: Arc, - /// Snapshot provider - #[allow(dead_code)] - snapshot_provider: Option>, + /// Static File provider + static_file_provider: StaticFileProvider, +} + +impl DatabaseProvider { + /// Returns a static file provider + pub fn static_file_provider(&self) -> &StaticFileProvider { + &self.static_file_provider + } } impl DatabaseProvider { /// Creates a provider with an inner read-write transaction. - pub fn new_rw(tx: TX, chain_spec: Arc) -> Self { - Self { tx, chain_spec, snapshot_provider: None } + pub fn new_rw( + tx: TX, + chain_spec: Arc, + static_file_provider: StaticFileProvider, + ) -> Self { + Self { tx, chain_spec, static_file_provider } } } @@ -153,6 +164,29 @@ impl DatabaseProvider { } } +impl DatabaseProvider { + #[cfg(any(test, feature = "test-utils"))] + /// Inserts an historical block. Used for setting up test environments + pub fn insert_historical_block( + &self, + block: SealedBlockWithSenders, + prune_modes: Option<&PruneModes>, + ) -> ProviderResult { + let ttd = if block.number == 0 { + block.difficulty + } else { + let parent_block_number = block.number - 1; + let parent_ttd = self.header_td_by_number(parent_block_number)?.unwrap_or_default(); + parent_ttd + block.difficulty + }; + + let mut writer = self.static_file_provider.latest_writer(StaticFileSegment::Headers)?; + writer.append_header(block.header.as_ref().clone(), ttd, block.hash())?; + + self.insert_block(block, prune_modes) + } +} + /// For a given key, unwind all history shards that are below the given block number. /// /// S - Sharded key subtype. @@ -170,7 +204,7 @@ fn unwind_history_shards( start_key: T::Key, block_number: BlockNumber, mut shard_belongs_to_key: impl FnMut(&T::Key) -> bool, -) -> ProviderResult> +) -> ProviderResult> where T: Table, T::Key: AsRef>, @@ -186,15 +220,15 @@ where // Check the first item. // If it is greater or eq to the block number, delete it. - let first = list.iter(0).next().expect("List can't be empty"); - if first >= block_number as usize { + let first = list.iter().next().expect("List can't be empty"); + if first >= block_number { item = cursor.prev()?; continue } else if block_number <= sharded_key.as_ref().highest_block_number { // Filter out all elements greater than block number. - return Ok(list.iter(0).take_while(|i| *i < block_number as usize).collect::>()) + return Ok(list.iter().take_while(|i| *i < block_number).collect::>()) } else { - return Ok(list.iter(0).collect::>()) + return Ok(list.iter().collect::>()) } } @@ -203,14 +237,12 @@ where impl DatabaseProvider { /// Creates a provider with an inner read-only transaction. - pub fn new(tx: TX, chain_spec: Arc) -> Self { - Self { tx, chain_spec, snapshot_provider: None } - } - - /// Creates a new [`Self`] with access to a [`SnapshotProvider`]. - pub fn with_snapshot_provider(mut self, snapshot_provider: Arc) -> Self { - self.snapshot_provider = Some(snapshot_provider); - self + pub fn new( + tx: TX, + chain_spec: Arc, + static_file_provider: StaticFileProvider, + ) -> Self { + Self { tx, chain_spec, static_file_provider } } /// Consume `DbTx` or `DbTxMut`. @@ -250,96 +282,6 @@ impl DatabaseProvider { self } - /// Gets data within a specified range, potentially spanning different snapshots and database. - /// - /// # Arguments - /// * `segment` - The segment of the snapshot to query. - /// * `block_range` - The range of data to fetch. - /// * `fetch_from_snapshot` - A function to fetch data from the snapshot. - /// * `fetch_from_database` - A function to fetch data from the database. - /// * `predicate` - A function used to evaluate each item in the fetched data. Fetching is - /// terminated when this function returns false, thereby filtering the data based on the - /// provided condition. - fn get_range_with_snapshot( - &self, - segment: SnapshotSegment, - mut block_or_tx_range: Range, - fetch_from_snapshot: FS, - mut fetch_from_database: FD, - mut predicate: P, - ) -> ProviderResult> - where - FS: Fn(&SnapshotProvider, Range, &mut P) -> ProviderResult>, - FD: FnMut(Range, P) -> ProviderResult>, - P: FnMut(&T) -> bool, - { - let mut data = Vec::new(); - - if let Some(snapshot_provider) = &self.snapshot_provider { - // If there is, check the maximum block or transaction number of the segment. - if let Some(snapshot_upper_bound) = match segment { - SnapshotSegment::Headers => snapshot_provider.get_highest_snapshot_block(segment), - SnapshotSegment::Transactions | SnapshotSegment::Receipts => { - snapshot_provider.get_highest_snapshot_tx(segment) - } - } { - if block_or_tx_range.start <= snapshot_upper_bound { - let end = block_or_tx_range.end.min(snapshot_upper_bound + 1); - data.extend(fetch_from_snapshot( - snapshot_provider, - block_or_tx_range.start..end, - &mut predicate, - )?); - block_or_tx_range.start = end; - } - } - } - - if block_or_tx_range.end > block_or_tx_range.start { - data.extend(fetch_from_database(block_or_tx_range, predicate)?) - } - - Ok(data) - } - - /// Retrieves data from the database or snapshot, wherever it's available. - /// - /// # Arguments - /// * `segment` - The segment of the snapshot to check against. - /// * `index_key` - Requested index key, usually a block or transaction number. - /// * `fetch_from_snapshot` - A closure that defines how to fetch the data from the snapshot - /// provider. - /// * `fetch_from_database` - A closure that defines how to fetch the data from the database - /// when the snapshot doesn't contain the required data or is not available. - fn get_with_snapshot( - &self, - segment: SnapshotSegment, - number: u64, - fetch_from_snapshot: FS, - fetch_from_database: FD, - ) -> ProviderResult> - where - FS: Fn(&SnapshotProvider) -> ProviderResult>, - FD: Fn() -> ProviderResult>, - { - if let Some(provider) = &self.snapshot_provider { - // If there is, check the maximum block or transaction number of the segment. - let snapshot_upper_bound = match segment { - SnapshotSegment::Headers => provider.get_highest_snapshot_block(segment), - SnapshotSegment::Transactions | SnapshotSegment::Receipts => { - provider.get_highest_snapshot_tx(segment) - } - }; - - if snapshot_upper_bound - .map_or(false, |snapshot_upper_bound| snapshot_upper_bound >= number) - { - return fetch_from_snapshot(provider) - } - } - fetch_from_database() - } - fn transactions_by_tx_range_with_cursor( &self, range: impl RangeBounds, @@ -348,10 +290,10 @@ impl DatabaseProvider { where C: DbCursorRO, { - self.get_range_with_snapshot( - SnapshotSegment::Transactions, + self.static_file_provider.get_range_with_static_file_or_database( + StaticFileSegment::Transactions, to_range(range), - |snapshot, range, _| snapshot.transactions_by_tx_range(range), + |static_file, range, _| static_file.transactions_by_tx_range(range), |range, _| self.cursor_collect(cursor, range), |_| true, ) @@ -375,9 +317,9 @@ impl DatabaseProvider { /// /// 1. Iterate over the [BlockBodyIndices][tables::BlockBodyIndices] table to get all /// the transaction ids. - /// 2. Iterate over the [StorageChangeSet][tables::StorageChangeSet] table - /// and the [AccountChangeSet][tables::AccountChangeSet] tables in reverse order to reconstruct - /// the changesets. + /// 2. Iterate over the [StorageChangeSets][tables::StorageChangeSets] table + /// and the [AccountChangeSets][tables::AccountChangeSets] tables in reverse order to + /// reconstruct the changesets. /// - In order to have both the old and new values in the changesets, we also access the /// plain state tables. /// 3. While iterating over the changeset tables, if we encounter a new account or storage slot, @@ -411,8 +353,8 @@ impl DatabaseProvider { let storage_range = BlockNumberAddress::range(range.clone()); let storage_changeset = - self.get_or_take::(storage_range)?; - let account_changeset = self.get_or_take::(range)?; + self.get_or_take::(storage_range)?; + let account_changeset = self.get_or_take::(range)?; // iterate previous value and get plain state value to create changeset // Double option around Account represent if Account state is know (first option) and @@ -592,8 +534,9 @@ impl DatabaseProvider { .map(|(id, tx)| (id, tx.into())) .collect::>(); - let mut senders = - self.get_or_take::(first_transaction..=last_transaction)?; + let mut senders = self.get_or_take::( + first_transaction..=last_transaction, + )?; // Recover senders manually if not found in db // NOTE: Transactions are always guaranteed to be in the database whereas @@ -655,18 +598,18 @@ impl DatabaseProvider { } if TAKE { - // Remove TxHashNumber - let mut tx_hash_cursor = self.tx.cursor_write::()?; + // Remove TransactionHashNumbers + let mut tx_hash_cursor = self.tx.cursor_write::()?; for (_, tx) in transactions.iter() { if tx_hash_cursor.seek_exact(tx.hash())?.is_some() { tx_hash_cursor.delete_current()?; } } - // Remove TransactionBlock index if there are transaction present + // Remove TransactionBlocks index if there are transaction present if !transactions.is_empty() { let tx_id_range = transactions.first().unwrap().0..=transactions.last().unwrap().0; - self.get_or_take::(tx_id_range)?; + self.get_or_take::(tx_id_range)?; } } @@ -723,8 +666,8 @@ impl DatabaseProvider { let block_tx = self.get_take_block_transaction_range::(range.clone())?; if TAKE { - // rm HeaderTD - self.get_or_take::(range)?; + // rm HeaderTerminalDifficulties + self.get_or_take::(range)?; // rm HeaderNumbers let mut header_number_cursor = self.tx.cursor_write::()?; for (_, hash) in block_header_hashes.iter() { @@ -914,7 +857,7 @@ impl DatabaseProvider { if let Some((shard_key, list)) = shard { // delete old shard so new one can be inserted. self.tx.delete::(shard_key, None)?; - let list = list.iter(0).map(|i| i as u64).collect::>(); + let list = list.iter().collect::>(); return Ok(list) } Ok(Vec::new()) @@ -943,13 +886,13 @@ impl DatabaseProvider { let chunks = indices .chunks(sharded_key::NUM_OF_INDICES_IN_SHARD) .into_iter() - .map(|chunks| chunks.map(|i| *i as usize).collect::>()) - .collect::>(); + .map(|chunks| chunks.copied().collect()) + .collect::>>(); let mut chunks = chunks.into_iter().peekable(); while let Some(list) = chunks.next() { let highest_block_number = if chunks.peek().is_some() { - *list.last().expect("`chunks` does not return empty list") as u64 + *list.last().expect("`chunks` does not return empty list") } else { // Insert last list with u64::MAX u64::MAX @@ -976,7 +919,7 @@ impl AccountExtReader for DatabaseProvider { range: impl RangeBounds, ) -> ProviderResult> { self.tx - .cursor_read::()? + .cursor_read::()? .walk_range(range)? .map(|entry| { entry.map(|(_, account_before)| account_before.address).map_err(Into::into) @@ -999,7 +942,7 @@ impl AccountExtReader for DatabaseProvider { &self, range: RangeInclusive, ) -> ProviderResult>> { - let mut changeset_cursor = self.tx.cursor_read::()?; + let mut changeset_cursor = self.tx.cursor_read::()?; let account_transitions = changeset_cursor.walk_range(range)?.try_fold( BTreeMap::new(), @@ -1021,7 +964,7 @@ impl ChangeSetReader for DatabaseProvider { ) -> ProviderResult> { let range = block_number..=block_number; self.tx - .cursor_read::()? + .cursor_read::()? .walk_range(range)? .map(|result| -> ProviderResult<_> { let (_, account_before) = result?; @@ -1037,45 +980,38 @@ impl HeaderSyncGapProvider for DatabaseProvider { mode: HeaderSyncMode, highest_uninterrupted_block: BlockNumber, ) -> RethResult { - // Create a cursor over canonical header hashes - let mut cursor = self.tx.cursor_read::()?; - let mut header_cursor = self.tx.cursor_read::()?; + let static_file_provider = self.static_file_provider(); + + // Make sure Headers static file is at the same height. If it's further, this + // input execution was interrupted previously and we need to unwind the static file. + let next_static_file_block_num = static_file_provider + .get_highest_static_file_block(StaticFileSegment::Headers) + .map(|id| id + 1) + .unwrap_or_default(); + let next_block = highest_uninterrupted_block + 1; + + match next_static_file_block_num.cmp(&next_block) { + // The node shutdown between an executed static file commit and before the database + // commit, so we need to unwind the static files. + Ordering::Greater => { + let mut static_file_producer = + static_file_provider.latest_writer(StaticFileSegment::Headers)?; + static_file_producer.prune_headers(next_static_file_block_num - next_block)? + } + Ordering::Less => { + // There's either missing or corrupted files. + return Err(ProviderError::HeaderNotFound(next_static_file_block_num.into()).into()) + } + Ordering::Equal => {} + } - // Get head hash and reposition the cursor - let (head_num, head_hash) = cursor - .seek_exact(highest_uninterrupted_block)? + let local_head = static_file_provider + .sealed_header(highest_uninterrupted_block)? .ok_or_else(|| ProviderError::HeaderNotFound(highest_uninterrupted_block.into()))?; - // Construct head - let (_, head) = header_cursor - .seek_exact(head_num)? - .ok_or_else(|| ProviderError::HeaderNotFound(head_num.into()))?; - let local_head = head.seal(head_hash); - - // Look up the next header - let next_header = cursor - .next()? - .map(|(next_num, next_hash)| -> Result { - let (_, next) = header_cursor - .seek_exact(next_num)? - .ok_or_else(|| ProviderError::HeaderNotFound(next_num.into()))?; - Ok(next.seal(next_hash)) - }) - .transpose()?; - - // Decide the tip or error out on invalid input. - // If the next element found in the cursor is not the "expected" next block per our current - // checkpoint, then there is a gap in the database and we should start downloading in - // reverse from there. Else, it should use whatever the forkchoice state reports. - let target = match next_header { - Some(header) if highest_uninterrupted_block + 1 != header.number => { - SyncTarget::Gap(header) - } - None => match mode { - HeaderSyncMode::Tip(rx) => SyncTarget::Tip(*rx.borrow()), - HeaderSyncMode::Continuous => SyncTarget::TipNum(head_num + 1), - }, - _ => return Err(ProviderError::InconsistentHeaderGap.into()), + let target = match mode { + HeaderSyncMode::Tip(rx) => SyncTarget::Tip(*rx.borrow()), + HeaderSyncMode::Continuous => SyncTarget::TipNum(highest_uninterrupted_block + 1), }; Ok(HeaderSyncGap { local_head, target }) @@ -1092,10 +1028,10 @@ impl HeaderProvider for DatabaseProvider { } fn header_by_number(&self, num: BlockNumber) -> ProviderResult> { - self.get_with_snapshot( - SnapshotSegment::Headers, + self.static_file_provider.get_with_static_file_or_database( + StaticFileSegment::Headers, num, - |snapshot| snapshot.header_by_number(num), + |static_file| static_file.header_by_number(num), || Ok(self.tx.get::(num)?), ) } @@ -1115,29 +1051,29 @@ impl HeaderProvider for DatabaseProvider { return Ok(Some(td)) } - self.get_with_snapshot( - SnapshotSegment::Headers, + self.static_file_provider.get_with_static_file_or_database( + StaticFileSegment::Headers, number, - |snapshot| snapshot.header_td_by_number(number), - || Ok(self.tx.get::(number)?.map(|td| td.0)), + |static_file| static_file.header_td_by_number(number), + || Ok(self.tx.get::(number)?.map(|td| td.0)), ) } fn headers_range(&self, range: impl RangeBounds) -> ProviderResult> { - self.get_range_with_snapshot( - SnapshotSegment::Headers, + self.static_file_provider.get_range_with_static_file_or_database( + StaticFileSegment::Headers, to_range(range), - |snapshot, range, _| snapshot.headers_range(range), + |static_file, range, _| static_file.headers_range(range), |range, _| self.cursor_read_collect::(range).map_err(Into::into), |_| true, ) } fn sealed_header(&self, number: BlockNumber) -> ProviderResult> { - self.get_with_snapshot( - SnapshotSegment::Headers, + self.static_file_provider.get_with_static_file_or_database( + StaticFileSegment::Headers, number, - |snapshot| snapshot.sealed_header(number), + |static_file| static_file.sealed_header(number), || { if let Some(header) = self.header_by_number(number)? { let hash = self @@ -1156,10 +1092,10 @@ impl HeaderProvider for DatabaseProvider { range: impl RangeBounds, predicate: impl FnMut(&SealedHeader) -> bool, ) -> ProviderResult> { - self.get_range_with_snapshot( - SnapshotSegment::Headers, + self.static_file_provider.get_range_with_static_file_or_database( + StaticFileSegment::Headers, to_range(range), - |snapshot, range, predicate| snapshot.sealed_headers_while(range, predicate), + |static_file, range, predicate| static_file.sealed_headers_while(range, predicate), |range, mut predicate| { let mut headers = vec![]; for entry in self.tx.cursor_read::()?.walk_range(range)? { @@ -1182,10 +1118,10 @@ impl HeaderProvider for DatabaseProvider { impl BlockHashReader for DatabaseProvider { fn block_hash(&self, number: u64) -> ProviderResult> { - self.get_with_snapshot( - SnapshotSegment::Headers, + self.static_file_provider.get_with_static_file_or_database( + StaticFileSegment::Headers, number, - |snapshot| snapshot.block_hash(number), + |static_file| static_file.block_hash(number), || Ok(self.tx.get::(number)?), ) } @@ -1195,10 +1131,10 @@ impl BlockHashReader for DatabaseProvider { start: BlockNumber, end: BlockNumber, ) -> ProviderResult> { - self.get_range_with_snapshot( - SnapshotSegment::Headers, + self.static_file_provider.get_range_with_static_file_or_database( + StaticFileSegment::Headers, start..end, - |snapshot, range, _| snapshot.canonical_hashes_range(range.start, range.end), + |static_file, range, _| static_file.canonical_hashes_range(range.start, range.end), |range, _| { self.cursor_read_collect::(range).map_err(Into::into) }, @@ -1418,10 +1354,10 @@ impl TransactionsProviderExt for DatabaseProvider { &self, tx_range: Range, ) -> ProviderResult> { - self.get_range_with_snapshot( - SnapshotSegment::Transactions, + self.static_file_provider.get_range_with_static_file_or_database( + StaticFileSegment::Transactions, tx_range, - |snapshot, range, _| snapshot.transaction_hashes_by_range(range), + |static_file, range, _| static_file.transaction_hashes_by_range(range), |tx_range, _| { let mut tx_cursor = self.tx.cursor_read::()?; let tx_range_size = tx_range.clone().count(); @@ -1482,14 +1418,14 @@ impl TransactionsProviderExt for DatabaseProvider { impl TransactionsProvider for DatabaseProvider { fn transaction_id(&self, tx_hash: TxHash) -> ProviderResult> { - Ok(self.tx.get::(tx_hash)?) + Ok(self.tx.get::(tx_hash)?) } fn transaction_by_id(&self, id: TxNumber) -> ProviderResult> { - self.get_with_snapshot( - SnapshotSegment::Transactions, + self.static_file_provider.get_with_static_file_or_database( + StaticFileSegment::Transactions, id, - |snapshot| snapshot.transaction_by_id(id), + |static_file| static_file.transaction_by_id(id), || Ok(self.tx.get::(id)?.map(Into::into)), ) } @@ -1498,10 +1434,10 @@ impl TransactionsProvider for DatabaseProvider { &self, id: TxNumber, ) -> ProviderResult> { - self.get_with_snapshot( - SnapshotSegment::Transactions, + self.static_file_provider.get_with_static_file_or_database( + StaticFileSegment::Transactions, id, - |snapshot| snapshot.transaction_by_id_no_hash(id), + |static_file| static_file.transaction_by_id_no_hash(id), || Ok(self.tx.get::(id)?), ) } @@ -1523,7 +1459,7 @@ impl TransactionsProvider for DatabaseProvider { &self, tx_hash: TxHash, ) -> ProviderResult> { - let mut transaction_cursor = self.tx.cursor_read::()?; + let mut transaction_cursor = self.tx.cursor_read::()?; if let Some(transaction_id) = self.transaction_id(tx_hash)? { if let Some(tx) = self.transaction_by_id_no_hash(transaction_id)? { let transaction = TransactionSigned { @@ -1563,7 +1499,7 @@ impl TransactionsProvider for DatabaseProvider { } fn transaction_block(&self, id: TxNumber) -> ProviderResult> { - let mut cursor = self.tx.cursor_read::()?; + let mut cursor = self.tx.cursor_read::()?; Ok(cursor.seek(id)?.map(|(_, bn)| bn)) } @@ -1629,20 +1565,20 @@ impl TransactionsProvider for DatabaseProvider { &self, range: impl RangeBounds, ) -> ProviderResult> { - self.cursor_read_collect::(range).map_err(Into::into) + self.cursor_read_collect::(range).map_err(Into::into) } fn transaction_sender(&self, id: TxNumber) -> ProviderResult> { - Ok(self.tx.get::(id)?) + Ok(self.tx.get::(id)?) } } impl ReceiptProvider for DatabaseProvider { fn receipt(&self, id: TxNumber) -> ProviderResult> { - self.get_with_snapshot( - SnapshotSegment::Receipts, + self.static_file_provider.get_with_static_file_or_database( + StaticFileSegment::Receipts, id, - |snapshot| snapshot.receipt(id), + |static_file| static_file.receipt(id), || Ok(self.tx.get::(id)?), ) } @@ -1673,10 +1609,10 @@ impl ReceiptProvider for DatabaseProvider { &self, range: impl RangeBounds, ) -> ProviderResult> { - self.get_range_with_snapshot( - SnapshotSegment::Receipts, + self.static_file_provider.get_range_with_static_file_or_database( + StaticFileSegment::Receipts, to_range(range), - |snapshot, range, _| snapshot.receipts_by_tx_range(range), + |static_file, range, _| static_file.receipts_by_tx_range(range), |range, _| self.cursor_read_collect::(range).map_err(Into::into), |_| true, ) @@ -1818,12 +1754,12 @@ impl EvmEnvProvider for DatabaseProvider { impl StageCheckpointReader for DatabaseProvider { fn get_stage_checkpoint(&self, id: StageId) -> ProviderResult> { - Ok(self.tx.get::(id.to_string())?) + Ok(self.tx.get::(id.to_string())?) } /// Get stage checkpoint progress. fn get_stage_checkpoint_progress(&self, id: StageId) -> ProviderResult>> { - Ok(self.tx.get::(id.to_string())?) + Ok(self.tx.get::(id.to_string())?) } } @@ -1834,7 +1770,7 @@ impl StageCheckpointWriter for DatabaseProvider { id: StageId, checkpoint: StageCheckpoint, ) -> ProviderResult<()> { - Ok(self.tx.put::(id.to_string(), checkpoint)?) + Ok(self.tx.put::(id.to_string(), checkpoint)?) } /// Save stage checkpoint progress. @@ -1843,7 +1779,7 @@ impl StageCheckpointWriter for DatabaseProvider { id: StageId, checkpoint: Vec, ) -> ProviderResult<()> { - Ok(self.tx.put::(id.to_string(), checkpoint)?) + Ok(self.tx.put::(id.to_string(), checkpoint)?) } fn update_pipeline_stages( @@ -1852,7 +1788,7 @@ impl StageCheckpointWriter for DatabaseProvider { drop_stage_checkpoint: bool, ) -> ProviderResult<()> { // iterate over all existing stages in the table and update its progress. - let mut cursor = self.tx.cursor_write::()?; + let mut cursor = self.tx.cursor_write::()?; for stage_id in StageId::ALL { let (_, checkpoint) = cursor.seek_exact(stage_id.to_string())?.unwrap_or_default(); cursor.upsert( @@ -1897,7 +1833,7 @@ impl StorageReader for DatabaseProvider { range: RangeInclusive, ) -> ProviderResult>> { self.tx - .cursor_read::()? + .cursor_read::()? .walk_range(BlockNumberAddress::range(range))? // fold all storages and save its old state so we can remove it from HashedStorage // it is needed as it is dup table. @@ -1912,7 +1848,7 @@ impl StorageReader for DatabaseProvider { &self, range: RangeInclusive, ) -> ProviderResult>> { - let mut changeset_cursor = self.tx.cursor_read::()?; + let mut changeset_cursor = self.tx.cursor_read::()?; let storage_changeset_lists = changeset_cursor.walk_range(BlockNumberAddress::range(range))?.try_fold( @@ -1941,7 +1877,7 @@ impl HashingWriter for DatabaseProvider { // changes are applied in the correct order. let hashed_accounts = self .tx - .cursor_read::()? + .cursor_read::()? .walk_range(range)? .map(|entry| entry.map(|(_, e)| (keccak256(e.address), e.info))) .collect::, _>>()? @@ -1950,7 +1886,7 @@ impl HashingWriter for DatabaseProvider { .collect::>(); // Apply values to HashedState, and remove the account if it's None. - let mut hashed_accounts_cursor = self.tx.cursor_write::()?; + let mut hashed_accounts_cursor = self.tx.cursor_write::()?; for (hashed_address, account) in &hashed_accounts { if let Some(account) = account { hashed_accounts_cursor.upsert(*hashed_address, *account)?; @@ -1966,7 +1902,7 @@ impl HashingWriter for DatabaseProvider { &self, accounts: impl IntoIterator)>, ) -> ProviderResult>> { - let mut hashed_accounts_cursor = self.tx.cursor_write::()?; + let mut hashed_accounts_cursor = self.tx.cursor_write::()?; let hashed_accounts = accounts.into_iter().map(|(ad, ac)| (keccak256(ad), ac)).collect::>(); for (hashed_address, account) in &hashed_accounts { @@ -1984,7 +1920,7 @@ impl HashingWriter for DatabaseProvider { range: Range, ) -> ProviderResult>> { // Aggregate all block changesets and make list of accounts that have been changed. - let mut changesets = self.tx.cursor_read::()?; + let mut changesets = self.tx.cursor_read::()?; let mut hashed_storages = changesets .walk_range(range)? .map(|entry| { @@ -1997,7 +1933,7 @@ impl HashingWriter for DatabaseProvider { // Apply values to HashedState, and remove the account if it's None. let mut hashed_storage_keys: HashMap> = HashMap::new(); - let mut hashed_storage = self.tx.cursor_dup_write::()?; + let mut hashed_storage = self.tx.cursor_dup_write::()?; for (hashed_address, key, value) in hashed_storages.into_iter().rev() { hashed_storage_keys.entry(hashed_address).or_default().insert(key); @@ -2036,7 +1972,7 @@ impl HashingWriter for DatabaseProvider { (*hashed_address, BTreeSet::from_iter(entries.keys().copied())) })); - let mut hashed_storage_cursor = self.tx.cursor_dup_write::()?; + let mut hashed_storage_cursor = self.tx.cursor_dup_write::()?; // Hash the address and key and apply them to HashedStorage (if Storage is None // just remove it); hashed_storages.into_iter().try_for_each(|(hashed_address, storage)| { @@ -2157,7 +2093,7 @@ impl HistoryWriter for DatabaseProvider { &self, storage_transitions: BTreeMap<(Address, B256), Vec>, ) -> ProviderResult<()> { - self.append_history_index::<_, tables::StorageHistory>( + self.append_history_index::<_, tables::StoragesHistory>( storage_transitions, |(address, storage_key), highest_block_number| { StorageShardedKey::new(address, storage_key, highest_block_number) @@ -2169,7 +2105,10 @@ impl HistoryWriter for DatabaseProvider { &self, account_transitions: BTreeMap>, ) -> ProviderResult<()> { - self.append_history_index::<_, tables::AccountHistory>(account_transitions, ShardedKey::new) + self.append_history_index::<_, tables::AccountsHistory>( + account_transitions, + ShardedKey::new, + ) } fn unwind_storage_history_indices( @@ -2178,7 +2117,7 @@ impl HistoryWriter for DatabaseProvider { ) -> ProviderResult { let mut storage_changesets = self .tx - .cursor_read::()? + .cursor_read::()? .walk_range(range)? .map(|entry| { entry.map(|(BlockNumberAddress((bn, address)), storage)| (address, storage.key, bn)) @@ -2186,9 +2125,9 @@ impl HistoryWriter for DatabaseProvider { .collect::, _>>()?; storage_changesets.sort_by_key(|(address, key, _)| (*address, *key)); - let mut cursor = self.tx.cursor_write::()?; + let mut cursor = self.tx.cursor_write::()?; for &(address, storage_key, rem_index) in &storage_changesets { - let partial_shard = unwind_history_shards::<_, tables::StorageHistory, _>( + let partial_shard = unwind_history_shards::<_, tables::StoragesHistory, _>( &mut cursor, StorageShardedKey::last(address, storage_key), rem_index, @@ -2218,16 +2157,16 @@ impl HistoryWriter for DatabaseProvider { ) -> ProviderResult { let mut last_indices = self .tx - .cursor_read::()? + .cursor_read::()? .walk_range(range)? .map(|entry| entry.map(|(index, account)| (account.address, index))) .collect::, _>>()?; last_indices.sort_by_key(|(a, _)| *a); // Unwind the account history index. - let mut cursor = self.tx.cursor_write::()?; + let mut cursor = self.tx.cursor_write::()?; for &(address, rem_index) in &last_indices { - let partial_shard = unwind_history_shards::<_, tables::AccountHistory, _>( + let partial_shard = unwind_history_shards::<_, tables::AccountsHistory, _>( &mut cursor, ShardedKey::last(address), rem_index, @@ -2375,8 +2314,8 @@ impl BlockWriter for DatabaseProvider { parent_ttd + block.difficulty }; - self.tx.put::(block_number, ttd.into())?; - durations_recorder.record_relative(metrics::Action::InsertHeaderTD); + self.tx.put::(block_number, ttd.into())?; + durations_recorder.record_relative(metrics::Action::InsertHeaderTerminalDifficulties); // insert body ommers data if !block.ommers.is_empty() { @@ -2389,7 +2328,7 @@ impl BlockWriter for DatabaseProvider { let mut next_tx_num = self .tx - .cursor_read::()? + .cursor_read::()? .last()? .map(|(n, _)| n + 1) .unwrap_or_default(); @@ -2412,7 +2351,7 @@ impl BlockWriter for DatabaseProvider { .is_none() { let start = Instant::now(); - self.tx.put::(next_tx_num, *sender)?; + self.tx.put::(next_tx_num, *sender)?; tx_senders_elapsed += start.elapsed(); } @@ -2437,16 +2376,19 @@ impl BlockWriter for DatabaseProvider { .is_none() { let start = Instant::now(); - self.tx.put::(hash, next_tx_num)?; + self.tx.put::(hash, next_tx_num)?; tx_hash_numbers_elapsed += start.elapsed(); } next_tx_num += 1; } - durations_recorder.record_duration(metrics::Action::InsertTxSenders, tx_senders_elapsed); durations_recorder - .record_duration(metrics::Action::InsertTransactions, transactions_elapsed); + .record_duration(metrics::Action::InsertTransactionSenders, tx_senders_elapsed); durations_recorder - .record_duration(metrics::Action::InsertTxHashNumbers, tx_hash_numbers_elapsed); + .record_duration(metrics::Action::InsertTransactions, transactions_elapsed); + durations_recorder.record_duration( + metrics::Action::InsertTransactionHashNumbers, + tx_hash_numbers_elapsed, + ); if let Some(withdrawals) = block.block.withdrawals { if !withdrawals.is_empty() { @@ -2463,8 +2405,8 @@ impl BlockWriter for DatabaseProvider { durations_recorder.record_relative(metrics::Action::InsertBlockBodyIndices); if !block_indices.is_empty() { - self.tx.put::(block_indices.last_tx_num(), block_number)?; - durations_recorder.record_relative(metrics::Action::InsertTransactionBlock); + self.tx.put::(block_indices.last_tx_num(), block_number)?; + durations_recorder.record_relative(metrics::Action::InsertTransactionBlocks); } debug!( @@ -2505,7 +2447,7 @@ impl BlockWriter for DatabaseProvider { // Write state and changesets to the database. // Must be written after blocks because of the receipt lookup. - state.write_to_db(self.tx_ref(), OriginalValuesKnown::No)?; + state.write_to_storage(self.tx_ref(), None, OriginalValuesKnown::No)?; durations_recorder.record_relative(metrics::Action::InsertState); // insert hashes and intermediate merkle nodes @@ -2547,6 +2489,19 @@ impl PruneCheckpointWriter for DatabaseProvider { } } +impl StatsReader for DatabaseProvider { + fn count_entries(&self) -> ProviderResult { + let db_entries = self.tx.entries::()?; + let static_file_entries = match self.static_file_provider.count_entries::() { + Ok(entries) => entries, + Err(ProviderError::UnsupportedProvider) => 0, + Err(err) => return Err(err), + }; + + Ok(db_entries + static_file_entries) + } +} + fn range_size_hint(range: &impl RangeBounds) -> Option { let start = match range.start_bound().cloned() { Bound::Included(start) => start, diff --git a/crates/storage/provider/src/providers/mod.rs b/crates/storage/provider/src/providers/mod.rs index 56bb9145431b..0fbde23f1323 100644 --- a/crates/storage/provider/src/providers/mod.rs +++ b/crates/storage/provider/src/providers/mod.rs @@ -39,8 +39,11 @@ pub use state::{ mod bundle_state_provider; mod chain_info; mod database; -mod snapshot; -pub use snapshot::{SnapshotJarProvider, SnapshotProvider}; +mod static_file; +pub use static_file::{ + StaticFileJarProvider, StaticFileProvider, StaticFileProviderRW, StaticFileProviderRWRefMut, + StaticFileWriter, +}; mod state; use crate::{providers::chain_info::ChainInfoTracker, traits::BlockSource}; pub use bundle_state_provider::BundleStateProvider; @@ -131,34 +134,34 @@ where Tree: Send + Sync, { fn header(&self, block_hash: &BlockHash) -> ProviderResult> { - self.database.provider()?.header(block_hash) + self.database.header(block_hash) } fn header_by_number(&self, num: BlockNumber) -> ProviderResult> { - self.database.provider()?.header_by_number(num) + self.database.header_by_number(num) } fn header_td(&self, hash: &BlockHash) -> ProviderResult> { - self.database.provider()?.header_td(hash) + self.database.header_td(hash) } fn header_td_by_number(&self, number: BlockNumber) -> ProviderResult> { - self.database.provider()?.header_td_by_number(number) + self.database.header_td_by_number(number) } fn headers_range(&self, range: impl RangeBounds) -> ProviderResult> { - self.database.provider()?.headers_range(range) + self.database.headers_range(range) } fn sealed_header(&self, number: BlockNumber) -> ProviderResult> { - self.database.provider()?.sealed_header(number) + self.database.sealed_header(number) } fn sealed_headers_range( &self, range: impl RangeBounds, ) -> ProviderResult> { - self.database.provider()?.sealed_headers_range(range) + self.database.sealed_headers_range(range) } fn sealed_headers_while( @@ -166,7 +169,7 @@ where range: impl RangeBounds, predicate: impl FnMut(&SealedHeader) -> bool, ) -> ProviderResult> { - self.database.provider()?.sealed_headers_while(range, predicate) + self.database.sealed_headers_while(range, predicate) } } @@ -176,7 +179,7 @@ where Tree: Send + Sync, { fn block_hash(&self, number: u64) -> ProviderResult> { - self.database.provider()?.block_hash(number) + self.database.block_hash(number) } fn canonical_hashes_range( @@ -184,7 +187,7 @@ where start: BlockNumber, end: BlockNumber, ) -> ProviderResult> { - self.database.provider()?.canonical_hashes_range(start, end) + self.database.canonical_hashes_range(start, end) } } @@ -202,11 +205,11 @@ where } fn last_block_number(&self) -> ProviderResult { - self.database.provider()?.last_block_number() + self.database.last_block_number() } fn block_number(&self, hash: B256) -> ProviderResult> { - self.database.provider()?.block_number(hash) + self.database.block_number(hash) } } @@ -237,7 +240,7 @@ where let block = match source { BlockSource::Any => { // check database first - let mut block = self.database.provider()?.block_by_hash(hash)?; + let mut block = self.database.block_by_hash(hash)?; if block.is_none() { // Note: it's fine to return the unsealed block because the caller already has // the hash @@ -246,7 +249,7 @@ where block } BlockSource::Pending => self.tree.block_by_hash(hash).map(|block| block.unseal()), - BlockSource::Database => self.database.provider()?.block_by_hash(hash)?, + BlockSource::Database => self.database.block_by_hash(hash)?, }; Ok(block) @@ -255,7 +258,7 @@ where fn block(&self, id: BlockHashOrNumber) -> ProviderResult> { match id { BlockHashOrNumber::Hash(hash) => self.find_block_by_hash(hash, BlockSource::Any), - BlockHashOrNumber::Number(num) => self.database.provider()?.block_by_number(num), + BlockHashOrNumber::Number(num) => self.database.block_by_number(num), } } @@ -272,14 +275,14 @@ where } fn ommers(&self, id: BlockHashOrNumber) -> ProviderResult>> { - self.database.provider()?.ommers(id) + self.database.ommers(id) } fn block_body_indices( &self, number: BlockNumber, ) -> ProviderResult> { - self.database.provider()?.block_body_indices(number) + self.database.block_body_indices(number) } /// Returns the block with senders with matching number or hash from database. @@ -293,11 +296,11 @@ where id: BlockHashOrNumber, transaction_kind: TransactionVariant, ) -> ProviderResult> { - self.database.provider()?.block_with_senders(id, transaction_kind) + self.database.block_with_senders(id, transaction_kind) } fn block_range(&self, range: RangeInclusive) -> ProviderResult> { - self.database.provider()?.block_range(range) + self.database.block_range(range) } } @@ -307,65 +310,65 @@ where Tree: BlockchainTreeViewer + Send + Sync, { fn transaction_id(&self, tx_hash: TxHash) -> ProviderResult> { - self.database.provider()?.transaction_id(tx_hash) + self.database.transaction_id(tx_hash) } fn transaction_by_id(&self, id: TxNumber) -> ProviderResult> { - self.database.provider()?.transaction_by_id(id) + self.database.transaction_by_id(id) } fn transaction_by_id_no_hash( &self, id: TxNumber, ) -> ProviderResult> { - self.database.provider()?.transaction_by_id_no_hash(id) + self.database.transaction_by_id_no_hash(id) } fn transaction_by_hash(&self, hash: TxHash) -> ProviderResult> { - self.database.provider()?.transaction_by_hash(hash) + self.database.transaction_by_hash(hash) } fn transaction_by_hash_with_meta( &self, tx_hash: TxHash, ) -> ProviderResult> { - self.database.provider()?.transaction_by_hash_with_meta(tx_hash) + self.database.transaction_by_hash_with_meta(tx_hash) } fn transaction_block(&self, id: TxNumber) -> ProviderResult> { - self.database.provider()?.transaction_block(id) + self.database.transaction_block(id) } fn transactions_by_block( &self, id: BlockHashOrNumber, ) -> ProviderResult>> { - self.database.provider()?.transactions_by_block(id) + self.database.transactions_by_block(id) } fn transactions_by_block_range( &self, range: impl RangeBounds, ) -> ProviderResult>> { - self.database.provider()?.transactions_by_block_range(range) + self.database.transactions_by_block_range(range) } fn transactions_by_tx_range( &self, range: impl RangeBounds, ) -> ProviderResult> { - self.database.provider()?.transactions_by_tx_range(range) + self.database.transactions_by_tx_range(range) } fn senders_by_tx_range( &self, range: impl RangeBounds, ) -> ProviderResult> { - self.database.provider()?.senders_by_tx_range(range) + self.database.senders_by_tx_range(range) } fn transaction_sender(&self, id: TxNumber) -> ProviderResult> { - self.database.provider()?.transaction_sender(id) + self.database.transaction_sender(id) } } @@ -375,22 +378,22 @@ where Tree: Send + Sync, { fn receipt(&self, id: TxNumber) -> ProviderResult> { - self.database.provider()?.receipt(id) + self.database.receipt(id) } fn receipt_by_hash(&self, hash: TxHash) -> ProviderResult> { - self.database.provider()?.receipt_by_hash(hash) + self.database.receipt_by_hash(hash) } fn receipts_by_block(&self, block: BlockHashOrNumber) -> ProviderResult>> { - self.database.provider()?.receipts_by_block(block) + self.database.receipts_by_block(block) } fn receipts_by_tx_range( &self, range: impl RangeBounds, ) -> ProviderResult> { - self.database.provider()?.receipts_by_tx_range(range) + self.database.receipts_by_tx_range(range) } } impl ReceiptProviderIdExt for BlockchainProvider @@ -431,11 +434,11 @@ where id: BlockHashOrNumber, timestamp: u64, ) -> ProviderResult> { - self.database.provider()?.withdrawals_by_block(id, timestamp) + self.database.withdrawals_by_block(id, timestamp) } fn latest_withdrawal(&self) -> ProviderResult> { - self.database.provider()?.latest_withdrawal() + self.database.latest_withdrawal() } } diff --git a/crates/storage/provider/src/providers/snapshot/manager.rs b/crates/storage/provider/src/providers/snapshot/manager.rs deleted file mode 100644 index c46cab325670..000000000000 --- a/crates/storage/provider/src/providers/snapshot/manager.rs +++ /dev/null @@ -1,685 +0,0 @@ -use super::{LoadedJar, SnapshotJarProvider}; -use crate::{ - to_range, BlockHashReader, BlockNumReader, BlockReader, BlockSource, HeaderProvider, - ReceiptProvider, TransactionVariant, TransactionsProvider, TransactionsProviderExt, - WithdrawalsProvider, -}; -use dashmap::DashMap; -use parking_lot::RwLock; -use reth_db::{ - codecs::CompactU256, - models::StoredBlockBodyIndices, - snapshot::{iter_snapshots, HeaderMask, ReceiptMask, SnapshotCursor, TransactionMask}, -}; -use reth_interfaces::provider::{ProviderError, ProviderResult}; -use reth_nippy_jar::NippyJar; -use reth_primitives::{ - snapshot::HighestSnapshots, Address, Block, BlockHash, BlockHashOrNumber, BlockNumber, - BlockWithSenders, ChainInfo, Header, Receipt, SealedBlock, SealedBlockWithSenders, - SealedHeader, SnapshotSegment, TransactionMeta, TransactionSigned, TransactionSignedNoHash, - TxHash, TxNumber, Withdrawal, Withdrawals, B256, U256, -}; -use std::{ - collections::{hash_map::Entry, BTreeMap, HashMap}, - ops::{Range, RangeBounds, RangeInclusive}, - path::{Path, PathBuf}, -}; -use tokio::sync::watch; - -/// Alias type for a map that can be queried for transaction/block ranges from a block/transaction -/// segment respectively. It uses `BlockNumber` to represent the block end of a snapshot range or -/// `TxNumber` to represent the transaction end of a snapshot range. -/// -/// Can be in one of the two formats: -/// - `HashMap>>` -/// - `HashMap>>` -type SegmentRanges = HashMap>>; - -/// [`SnapshotProvider`] manages all existing [`SnapshotJarProvider`]. -#[derive(Debug, Default)] -pub struct SnapshotProvider { - /// Maintains a map which allows for concurrent access to different `NippyJars`, over different - /// segments and ranges. - map: DashMap<(BlockNumber, SnapshotSegment), LoadedJar>, - /// Available snapshot transaction ranges on disk indexed by max blocks. - snapshots_block_index: RwLock, - /// Available snapshot block ranges on disk indexed by max transactions. - snapshots_tx_index: RwLock, - /// Tracks the highest snapshot of every segment. - highest_tracker: Option>>, - /// Directory where snapshots are located - path: PathBuf, - /// Whether [`SnapshotJarProvider`] loads filters into memory. If not, `by_hash` queries won't - /// be able to be queried directly. - load_filters: bool, -} - -impl SnapshotProvider { - /// Creates a new [`SnapshotProvider`]. - pub fn new(path: impl AsRef) -> ProviderResult { - let provider = Self { - map: Default::default(), - snapshots_block_index: Default::default(), - snapshots_tx_index: Default::default(), - highest_tracker: None, - path: path.as_ref().to_path_buf(), - load_filters: false, - }; - - provider.update_index()?; - Ok(provider) - } - - /// Loads filters into memory when creating a [`SnapshotJarProvider`]. - pub fn with_filters(mut self) -> Self { - self.load_filters = true; - self - } - - /// Adds a highest snapshot tracker to the provider - pub fn with_highest_tracker( - mut self, - highest_tracker: Option>>, - ) -> Self { - self.highest_tracker = highest_tracker; - self - } - - /// Gets the [`SnapshotJarProvider`] of the requested segment and block. - pub fn get_segment_provider_from_block( - &self, - segment: SnapshotSegment, - block: BlockNumber, - path: Option<&Path>, - ) -> ProviderResult> { - self.get_segment_provider( - segment, - || self.get_segment_ranges_from_block(segment, block), - path, - )? - .ok_or_else(|| ProviderError::MissingSnapshotBlock(segment, block)) - } - - /// Gets the [`SnapshotJarProvider`] of the requested segment and transaction. - pub fn get_segment_provider_from_transaction( - &self, - segment: SnapshotSegment, - tx: TxNumber, - path: Option<&Path>, - ) -> ProviderResult> { - self.get_segment_provider( - segment, - || self.get_segment_ranges_from_transaction(segment, tx), - path, - )? - .ok_or_else(|| ProviderError::MissingSnapshotTx(segment, tx)) - } - - /// Gets the [`SnapshotJarProvider`] of the requested segment and block or transaction. - pub fn get_segment_provider( - &self, - segment: SnapshotSegment, - fn_ranges: impl Fn() -> Option<(RangeInclusive, RangeInclusive)>, - path: Option<&Path>, - ) -> ProviderResult>> { - // If we have a path, then get the block range and transaction range from its name. - // Otherwise, check `self.available_snapshots` - let snapshot_ranges = match path { - Some(path) => { - SnapshotSegment::parse_filename(path.file_name().ok_or_else(|| { - ProviderError::MissingSnapshotPath(segment, path.to_path_buf()) - })?) - .and_then(|(parsed_segment, block_range, tx_range)| { - if parsed_segment == segment { - return Some((block_range, tx_range)) - } - None - }) - } - None => fn_ranges(), - }; - - // Return cached `LoadedJar` or insert it for the first time, and then, return it. - if let Some((block_range, tx_range)) = snapshot_ranges { - return Ok(Some(self.get_or_create_jar_provider(segment, &block_range, &tx_range)?)) - } - - Ok(None) - } - - /// Given a segment, block range and transaction range it returns a cached - /// [`SnapshotJarProvider`]. TODO: we should check the size and pop N if there's too many. - fn get_or_create_jar_provider( - &self, - segment: SnapshotSegment, - block_range: &RangeInclusive, - tx_range: &RangeInclusive, - ) -> ProviderResult> { - let key = (*block_range.end(), segment); - let entry = match self.map.entry(key) { - dashmap::mapref::entry::Entry::Occupied(entry) => entry.into_ref(), - dashmap::mapref::entry::Entry::Vacant(entry) => { - let path = self.path.join(segment.filename(block_range, tx_range)); - let mut jar = NippyJar::load(&path)?; - if self.load_filters { - jar.load_filters()?; - } - let loaded_jar = LoadedJar::new(jar)?; - entry.insert(loaded_jar) - } - }; - Ok(entry.downgrade().into()) - } - - /// Gets a snapshot segment's block range and transaction range from the provider inner block - /// index. - fn get_segment_ranges_from_block( - &self, - segment: SnapshotSegment, - block: u64, - ) -> Option<(RangeInclusive, RangeInclusive)> { - let snapshots = self.snapshots_block_index.read(); - let segment_snapshots = snapshots.get(&segment)?; - - // It's more probable that the request comes from a newer block height, so we iterate - // the snapshots in reverse. - let mut snapshots_rev_iter = segment_snapshots.iter().rev().peekable(); - - while let Some((block_end, tx_range)) = snapshots_rev_iter.next() { - if block > *block_end { - // request block is higher than highest snapshot block - return None - } - // `unwrap_or(0) is safe here as it sets block_start to 0 if the iterator is empty, - // indicating the lowest height snapshot has been reached. - let block_start = - snapshots_rev_iter.peek().map(|(block_end, _)| *block_end + 1).unwrap_or(0); - if block_start <= block { - return Some((block_start..=*block_end, tx_range.clone())) - } - } - None - } - - /// Gets a snapshot segment's block range and transaction range from the provider inner - /// transaction index. - fn get_segment_ranges_from_transaction( - &self, - segment: SnapshotSegment, - tx: u64, - ) -> Option<(RangeInclusive, RangeInclusive)> { - let snapshots = self.snapshots_tx_index.read(); - let segment_snapshots = snapshots.get(&segment)?; - - // It's more probable that the request comes from a newer tx height, so we iterate - // the snapshots in reverse. - let mut snapshots_rev_iter = segment_snapshots.iter().rev().peekable(); - - while let Some((tx_end, block_range)) = snapshots_rev_iter.next() { - if tx > *tx_end { - // request tx is higher than highest snapshot tx - return None - } - let tx_start = snapshots_rev_iter.peek().map(|(tx_end, _)| *tx_end + 1).unwrap_or(0); - if tx_start <= tx { - return Some((block_range.clone(), tx_start..=*tx_end)) - } - } - None - } - - /// Updates the inner transaction and block index - pub fn update_index(&self) -> ProviderResult<()> { - let mut block_index = self.snapshots_block_index.write(); - let mut tx_index = self.snapshots_tx_index.write(); - - for (segment, ranges) in iter_snapshots(&self.path)? { - for (block_range, tx_range) in ranges { - let block_end = *block_range.end(); - let tx_end = *tx_range.end(); - - match tx_index.entry(segment) { - Entry::Occupied(mut index) => { - index.get_mut().insert(tx_end, block_range); - } - Entry::Vacant(index) => { - index.insert(BTreeMap::from([(tx_end, block_range)])); - } - }; - - match block_index.entry(segment) { - Entry::Occupied(mut index) => { - index.get_mut().insert(block_end, tx_range); - } - Entry::Vacant(index) => { - index.insert(BTreeMap::from([(block_end, tx_range)])); - } - }; - } - } - - Ok(()) - } - - /// Gets the highest snapshot block if it exists for a snapshot segment. - pub fn get_highest_snapshot_block(&self, segment: SnapshotSegment) -> Option { - self.snapshots_block_index - .read() - .get(&segment) - .and_then(|index| index.last_key_value().map(|(last_block, _)| *last_block)) - } - - /// Gets the highest snapshotted transaction. - pub fn get_highest_snapshot_tx(&self, segment: SnapshotSegment) -> Option { - self.snapshots_tx_index - .read() - .get(&segment) - .and_then(|index| index.last_key_value().map(|(last_tx, _)| *last_tx)) - } - - /// Iterates through segment snapshots in reverse order, executing a function until it returns - /// some object. Useful for finding objects by [`TxHash`] or [`BlockHash`]. - pub fn find_snapshot( - &self, - segment: SnapshotSegment, - func: impl Fn(SnapshotJarProvider<'_>) -> ProviderResult>, - ) -> ProviderResult> { - let snapshots = self.snapshots_block_index.read(); - if let Some(segment_snapshots) = snapshots.get(&segment) { - // It's more probable that the request comes from a newer block height, so we iterate - // the snapshots in reverse. - let mut snapshots_rev_iter = segment_snapshots.iter().rev().peekable(); - - while let Some((block_end, tx_range)) = snapshots_rev_iter.next() { - // `unwrap_or(0) is safe here as it sets block_start to 0 if the iterator - // is empty, indicating the lowest height snapshot has been reached. - let block_start = - snapshots_rev_iter.peek().map(|(block_end, _)| *block_end + 1).unwrap_or(0); - - if let Some(res) = func(self.get_or_create_jar_provider( - segment, - &(block_start..=*block_end), - tx_range, - )?)? { - return Ok(Some(res)) - } - } - } - - Ok(None) - } - - /// Fetches data within a specified range across multiple snapshot files. - /// - /// This function iteratively retrieves data using `get_fn` for each item in the given range. - /// It continues fetching until the end of the range is reached or the provided `predicate` - /// returns false. - pub fn fetch_range( - &self, - segment: SnapshotSegment, - range: Range, - get_fn: F, - mut predicate: P, - ) -> ProviderResult> - where - F: Fn(&mut SnapshotCursor<'_>, u64) -> ProviderResult>, - P: FnMut(&T) -> bool, - { - let get_provider = |start: u64| match segment { - SnapshotSegment::Headers => self.get_segment_provider_from_block(segment, start, None), - SnapshotSegment::Transactions | SnapshotSegment::Receipts => { - self.get_segment_provider_from_transaction(segment, start, None) - } - }; - - let mut result = Vec::with_capacity((range.end - range.start).min(100) as usize); - let mut provider = get_provider(range.start)?; - let mut cursor = provider.cursor()?; - - // advances number in range - 'outer: for number in range { - // advances snapshot files if `get_fn` returns None - 'inner: loop { - match get_fn(&mut cursor, number)? { - Some(res) => { - if !predicate(&res) { - break 'outer - } - result.push(res); - break 'inner - } - None => { - provider = get_provider(number)?; - cursor = provider.cursor()?; - } - } - } - } - - Ok(result) - } -} - -impl HeaderProvider for SnapshotProvider { - fn header(&self, block_hash: &BlockHash) -> ProviderResult> { - self.find_snapshot(SnapshotSegment::Headers, |jar_provider| { - Ok(jar_provider - .cursor()? - .get_two::>(block_hash.into())? - .and_then(|(header, hash)| { - if &hash == block_hash { - return Some(header) - } - None - })) - }) - } - - fn header_by_number(&self, num: BlockNumber) -> ProviderResult> { - self.get_segment_provider_from_block(SnapshotSegment::Headers, num, None)? - .header_by_number(num) - } - - fn header_td(&self, block_hash: &BlockHash) -> ProviderResult> { - self.find_snapshot(SnapshotSegment::Headers, |jar_provider| { - Ok(jar_provider - .cursor()? - .get_two::>(block_hash.into())? - .and_then(|(td, hash)| (&hash == block_hash).then_some(td.0))) - }) - } - - fn header_td_by_number(&self, num: BlockNumber) -> ProviderResult> { - self.get_segment_provider_from_block(SnapshotSegment::Headers, num, None)? - .header_td_by_number(num) - } - - fn headers_range(&self, range: impl RangeBounds) -> ProviderResult> { - self.fetch_range( - SnapshotSegment::Headers, - to_range(range), - |cursor, number| cursor.get_one::>(number.into()), - |_| true, - ) - } - - fn sealed_header(&self, num: BlockNumber) -> ProviderResult> { - self.get_segment_provider_from_block(SnapshotSegment::Headers, num, None)? - .sealed_header(num) - } - - fn sealed_headers_while( - &self, - range: impl RangeBounds, - predicate: impl FnMut(&SealedHeader) -> bool, - ) -> ProviderResult> { - self.fetch_range( - SnapshotSegment::Headers, - to_range(range), - |cursor, number| { - Ok(cursor - .get_two::>(number.into())? - .map(|(header, hash)| header.seal(hash))) - }, - predicate, - ) - } -} - -impl BlockHashReader for SnapshotProvider { - fn block_hash(&self, num: u64) -> ProviderResult> { - self.get_segment_provider_from_block(SnapshotSegment::Headers, num, None)?.block_hash(num) - } - - fn canonical_hashes_range( - &self, - start: BlockNumber, - end: BlockNumber, - ) -> ProviderResult> { - self.fetch_range( - SnapshotSegment::Headers, - start..end, - |cursor, number| cursor.get_one::>(number.into()), - |_| true, - ) - } -} - -impl ReceiptProvider for SnapshotProvider { - fn receipt(&self, num: TxNumber) -> ProviderResult> { - self.get_segment_provider_from_transaction(SnapshotSegment::Receipts, num, None)? - .receipt(num) - } - - fn receipt_by_hash(&self, hash: TxHash) -> ProviderResult> { - if let Some(num) = self.transaction_id(hash)? { - return self.receipt(num) - } - Ok(None) - } - - fn receipts_by_block(&self, _block: BlockHashOrNumber) -> ProviderResult>> { - unreachable!() - } - - fn receipts_by_tx_range( - &self, - range: impl RangeBounds, - ) -> ProviderResult> { - self.fetch_range( - SnapshotSegment::Receipts, - to_range(range), - |cursor, number| cursor.get_one::>(number.into()), - |_| true, - ) - } -} - -impl TransactionsProviderExt for SnapshotProvider { - fn transaction_hashes_by_range( - &self, - tx_range: Range, - ) -> ProviderResult> { - self.fetch_range( - SnapshotSegment::Transactions, - tx_range, - |cursor, number| { - let tx = - cursor.get_one::>(number.into())?; - Ok(tx.map(|tx| (tx.hash(), cursor.number()))) - }, - |_| true, - ) - } -} - -impl TransactionsProvider for SnapshotProvider { - fn transaction_id(&self, tx_hash: TxHash) -> ProviderResult> { - self.find_snapshot(SnapshotSegment::Transactions, |jar_provider| { - let mut cursor = jar_provider.cursor()?; - if cursor - .get_one::>((&tx_hash).into())? - .and_then(|tx| (tx.hash() == tx_hash).then_some(tx)) - .is_some() - { - Ok(Some(cursor.number())) - } else { - Ok(None) - } - }) - } - - fn transaction_by_id(&self, num: TxNumber) -> ProviderResult> { - self.get_segment_provider_from_transaction(SnapshotSegment::Transactions, num, None)? - .transaction_by_id(num) - } - - fn transaction_by_id_no_hash( - &self, - num: TxNumber, - ) -> ProviderResult> { - self.get_segment_provider_from_transaction(SnapshotSegment::Transactions, num, None)? - .transaction_by_id_no_hash(num) - } - - fn transaction_by_hash(&self, hash: TxHash) -> ProviderResult> { - self.find_snapshot(SnapshotSegment::Transactions, |jar_provider| { - Ok(jar_provider - .cursor()? - .get_one::>((&hash).into())? - .map(|tx| tx.with_hash()) - .and_then(|tx| (tx.hash_ref() == &hash).then_some(tx))) - }) - } - - fn transaction_by_hash_with_meta( - &self, - _hash: TxHash, - ) -> ProviderResult> { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } - - fn transaction_block(&self, _id: TxNumber) -> ProviderResult> { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } - - fn transactions_by_block( - &self, - _block_id: BlockHashOrNumber, - ) -> ProviderResult>> { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } - - fn transactions_by_block_range( - &self, - _range: impl RangeBounds, - ) -> ProviderResult>> { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } - - fn senders_by_tx_range( - &self, - range: impl RangeBounds, - ) -> ProviderResult> { - let txes = self.transactions_by_tx_range(range)?; - TransactionSignedNoHash::recover_signers(&txes, txes.len()) - .ok_or(ProviderError::SenderRecoveryError) - } - - fn transactions_by_tx_range( - &self, - range: impl RangeBounds, - ) -> ProviderResult> { - self.fetch_range( - SnapshotSegment::Transactions, - to_range(range), - |cursor, number| { - cursor.get_one::>(number.into()) - }, - |_| true, - ) - } - - fn transaction_sender(&self, id: TxNumber) -> ProviderResult> { - Ok(self.transaction_by_id_no_hash(id)?.and_then(|tx| tx.recover_signer())) - } -} - -/* Cannot be successfully implemented but must exist for trait requirements */ - -impl BlockNumReader for SnapshotProvider { - fn chain_info(&self) -> ProviderResult { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } - - fn best_block_number(&self) -> ProviderResult { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } - - fn last_block_number(&self) -> ProviderResult { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } - - fn block_number(&self, _hash: B256) -> ProviderResult> { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } -} - -impl BlockReader for SnapshotProvider { - fn find_block_by_hash( - &self, - _hash: B256, - _source: BlockSource, - ) -> ProviderResult> { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } - - fn block(&self, _id: BlockHashOrNumber) -> ProviderResult> { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } - - fn pending_block(&self) -> ProviderResult> { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } - - fn pending_block_with_senders(&self) -> ProviderResult> { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } - - fn pending_block_and_receipts(&self) -> ProviderResult)>> { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } - - fn ommers(&self, _id: BlockHashOrNumber) -> ProviderResult>> { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } - - fn block_body_indices(&self, _num: u64) -> ProviderResult> { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } - - fn block_with_senders( - &self, - _id: BlockHashOrNumber, - _transaction_kind: TransactionVariant, - ) -> ProviderResult> { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } - - fn block_range(&self, _range: RangeInclusive) -> ProviderResult> { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } -} - -impl WithdrawalsProvider for SnapshotProvider { - fn withdrawals_by_block( - &self, - _id: BlockHashOrNumber, - _timestamp: u64, - ) -> ProviderResult> { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } - - fn latest_withdrawal(&self) -> ProviderResult> { - // Required data not present in snapshots - Err(ProviderError::UnsupportedProvider) - } -} diff --git a/crates/storage/provider/src/providers/state/historical.rs b/crates/storage/provider/src/providers/state/historical.rs index 8cc9bb347635..55b1bec1d910 100644 --- a/crates/storage/provider/src/providers/state/historical.rs +++ b/crates/storage/provider/src/providers/state/historical.rs @@ -1,6 +1,7 @@ use crate::{ - providers::state::macros::delegate_provider_impls, AccountReader, BlockHashReader, - BundleStateWithReceipts, ProviderError, StateProvider, StateRootProvider, + providers::{state::macros::delegate_provider_impls, StaticFileProvider}, + AccountReader, BlockHashReader, BundleStateWithReceipts, ProviderError, StateProvider, + StateRootProvider, }; use reth_db::{ cursor::{DbCursorRO, DbDupCursorRO}, @@ -13,9 +14,10 @@ use reth_db::{ use reth_interfaces::provider::ProviderResult; use reth_primitives::{ constants::EPOCH_SLOTS, trie::AccountProof, Account, Address, BlockNumber, Bytecode, - StorageKey, StorageValue, B256, + StaticFileSegment, StorageKey, StorageValue, B256, }; use reth_trie::{updates::TrieUpdates, HashedPostState}; +use std::fmt::Debug; /// State provider for a given block number which takes a tx reference. /// @@ -23,11 +25,11 @@ use reth_trie::{updates::TrieUpdates, HashedPostState}; /// It means that all changes made in the provided block number are not included. /// /// Historical state provider reads the following tables: -/// - [tables::AccountHistory] +/// - [tables::AccountsHistory] /// - [tables::Bytecodes] -/// - [tables::StorageHistory] -/// - [tables::AccountChangeSet] -/// - [tables::StorageChangeSet] +/// - [tables::StoragesHistory] +/// - [tables::AccountChangeSets] +/// - [tables::StorageChangeSets] #[derive(Debug)] pub struct HistoricalStateProviderRef<'b, TX: DbTx> { /// Transaction @@ -36,6 +38,8 @@ pub struct HistoricalStateProviderRef<'b, TX: DbTx> { block_number: BlockNumber, /// Lowest blocks at which different parts of the state are available. lowest_available_blocks: LowestAvailableBlocks, + /// Static File provider + static_file_provider: StaticFileProvider, } #[derive(Debug, Eq, PartialEq)] @@ -48,8 +52,12 @@ pub enum HistoryInfo { impl<'b, TX: DbTx> HistoricalStateProviderRef<'b, TX> { /// Create new StateProvider for historical block number - pub fn new(tx: &'b TX, block_number: BlockNumber) -> Self { - Self { tx, block_number, lowest_available_blocks: Default::default() } + pub fn new( + tx: &'b TX, + block_number: BlockNumber, + static_file_provider: StaticFileProvider, + ) -> Self { + Self { tx, block_number, lowest_available_blocks: Default::default(), static_file_provider } } /// Create new StateProvider for historical block number and lowest block numbers at which @@ -58,11 +66,12 @@ impl<'b, TX: DbTx> HistoricalStateProviderRef<'b, TX> { tx: &'b TX, block_number: BlockNumber, lowest_available_blocks: LowestAvailableBlocks, + static_file_provider: StaticFileProvider, ) -> Self { - Self { tx, block_number, lowest_available_blocks } + Self { tx, block_number, lowest_available_blocks, static_file_provider } } - /// Lookup an account in the AccountHistory table + /// Lookup an account in the AccountsHistory table pub fn account_history_lookup(&self, address: Address) -> ProviderResult { if !self.lowest_available_blocks.is_account_history_available(self.block_number) { return Err(ProviderError::StateAtBlockPruned(self.block_number)) @@ -70,14 +79,14 @@ impl<'b, TX: DbTx> HistoricalStateProviderRef<'b, TX> { // history key to search IntegerList of block number changesets. let history_key = ShardedKey::new(address, self.block_number); - self.history_info::( + self.history_info::( history_key, |key| key.key == address, self.lowest_available_blocks.account_history_block_number, ) } - /// Lookup a storage key in the StorageHistory table + /// Lookup a storage key in the StoragesHistory table pub fn storage_history_lookup( &self, address: Address, @@ -89,7 +98,7 @@ impl<'b, TX: DbTx> HistoricalStateProviderRef<'b, TX> { // history key to search IntegerList of block number changesets. let history_key = StorageShardedKey::new(address, storage_key, self.block_number); - self.history_info::( + self.history_info::( history_key, |key| key.address == address && key.sharded_key.key == storage_key, self.lowest_available_blocks.storage_history_block_number, @@ -104,10 +113,14 @@ impl<'b, TX: DbTx> HistoricalStateProviderRef<'b, TX> { return Err(ProviderError::StateAtBlockPruned(self.block_number)) } - let (tip, _) = self + let tip = self .tx .cursor_read::()? .last()? + .map(|(tip, _)| tip) + .or_else(|| { + self.static_file_provider.get_highest_static_file_block(StaticFileSegment::Headers) + }) .ok_or(ProviderError::BestBlockNotFound)?; if tip.saturating_sub(self.block_number) > EPOCH_SLOTS { @@ -136,10 +149,16 @@ impl<'b, TX: DbTx> HistoricalStateProviderRef<'b, TX> { // index, the first chunk for the next key will be returned so we filter out chunks that // have a different key. if let Some(chunk) = cursor.seek(key)?.filter(|(key, _)| key_filter(key)).map(|x| x.1 .0) { - let chunk = chunk.enable_rank(); + // Get the rank of the first entry before or equal to our block. + let mut rank = chunk.rank(self.block_number); - // Get the rank of the first entry after our block. - let rank = chunk.rank(self.block_number as usize); + // Adjust the rank, so that we have the rank of the first entry strictly before our + // block (not equal to it). + if rank.checked_sub(1).and_then(|rank| chunk.select(rank)) == Some(self.block_number) { + rank -= 1 + }; + + let block_number = chunk.select(rank); // If our block is before the first entry in the index chunk and this first entry // doesn't equal to our block, it might be before the first write ever. To check, we @@ -148,20 +167,21 @@ impl<'b, TX: DbTx> HistoricalStateProviderRef<'b, TX> { // short-circuit) and when it passes we save a full seek into the changeset/plain state // table. if rank == 0 && - chunk.select(rank) as u64 != self.block_number && + block_number != Some(self.block_number) && !cursor.prev()?.is_some_and(|(key, _)| key_filter(&key)) { - if lowest_available_block_number.is_some() { + if let (Some(_), Some(block_number)) = (lowest_available_block_number, block_number) + { // The key may have been written, but due to pruning we may not have changesets // and history, so we need to make a changeset lookup. - Ok(HistoryInfo::InChangeset(chunk.select(rank) as u64)) + Ok(HistoryInfo::InChangeset(block_number)) } else { // The key is written to, but only after our block. Ok(HistoryInfo::NotYetWritten) } - } else if rank < chunk.len() { + } else if let Some(block_number) = block_number { // The chunk contains an entry for a write after our block, return it. - Ok(HistoryInfo::InChangeset(chunk.select(rank) as u64)) + Ok(HistoryInfo::InChangeset(block_number)) } else { // The chunk does not contain an entry for a write after our block. This can only // happen if this is the last chunk and so we need to look in the plain state. @@ -185,7 +205,7 @@ impl<'b, TX: DbTx> AccountReader for HistoricalStateProviderRef<'b, TX> { HistoryInfo::NotYetWritten => Ok(None), HistoryInfo::InChangeset(changeset_block_number) => Ok(self .tx - .cursor_dup_read::()? + .cursor_dup_read::()? .seek_by_key_subkey(changeset_block_number, address)? .filter(|acc| acc.address == address) .ok_or(ProviderError::AccountChangesetNotFound { @@ -203,7 +223,12 @@ impl<'b, TX: DbTx> AccountReader for HistoricalStateProviderRef<'b, TX> { impl<'b, TX: DbTx> BlockHashReader for HistoricalStateProviderRef<'b, TX> { /// Get block hash by number. fn block_hash(&self, number: u64) -> ProviderResult> { - self.tx.get::(number).map_err(Into::into) + self.static_file_provider.get_with_static_file_or_database( + StaticFileSegment::Headers, + number, + |static_file| static_file.block_hash(number), + || Ok(self.tx.get::(number)?), + ) } fn canonical_hashes_range( @@ -211,16 +236,23 @@ impl<'b, TX: DbTx> BlockHashReader for HistoricalStateProviderRef<'b, TX> { start: BlockNumber, end: BlockNumber, ) -> ProviderResult> { - let range = start..end; - self.tx - .cursor_read::() - .map(|mut cursor| { - cursor - .walk_range(range)? - .map(|result| result.map(|(_, hash)| hash).map_err(Into::into)) - .collect::>>() - })? - .map_err(Into::into) + self.static_file_provider.get_range_with_static_file_or_database( + StaticFileSegment::Headers, + start..end, + |static_file, range, _| static_file.canonical_hashes_range(range.start, range.end), + |range, _| { + self.tx + .cursor_read::() + .map(|mut cursor| { + cursor + .walk_range(range)? + .map(|result| result.map(|(_, hash)| hash).map_err(Into::into)) + .collect::>>() + })? + .map_err(Into::into) + }, + |_| true, + ) } } @@ -254,7 +286,7 @@ impl<'b, TX: DbTx> StateProvider for HistoricalStateProviderRef<'b, TX> { HistoryInfo::NotYetWritten => Ok(None), HistoryInfo::InChangeset(changeset_block_number) => Ok(Some( self.tx - .cursor_dup_read::()? + .cursor_dup_read::()? .seek_by_key_subkey((changeset_block_number, address).into(), storage_key)? .filter(|entry| entry.key == storage_key) .ok_or_else(|| ProviderError::StorageChangesetNotFound { @@ -295,12 +327,18 @@ pub struct HistoricalStateProvider { block_number: BlockNumber, /// Lowest blocks at which different parts of the state are available. lowest_available_blocks: LowestAvailableBlocks, + /// Static File provider + static_file_provider: StaticFileProvider, } impl HistoricalStateProvider { /// Create new StateProvider for historical block number - pub fn new(tx: TX, block_number: BlockNumber) -> Self { - Self { tx, block_number, lowest_available_blocks: Default::default() } + pub fn new( + tx: TX, + block_number: BlockNumber, + static_file_provider: StaticFileProvider, + ) -> Self { + Self { tx, block_number, lowest_available_blocks: Default::default(), static_file_provider } } /// Set the lowest block number at which the account history is available. @@ -328,6 +366,7 @@ impl HistoricalStateProvider { &self.tx, self.block_number, self.lowest_available_blocks, + self.static_file_provider.clone(), ) } } @@ -367,13 +406,12 @@ impl LowestAvailableBlocks { mod tests { use crate::{ providers::state::historical::{HistoryInfo, LowestAvailableBlocks}, + test_utils::create_test_provider_factory, AccountReader, HistoricalStateProvider, HistoricalStateProviderRef, StateProvider, }; use reth_db::{ - database::Database, models::{storage_sharded_key::StorageShardedKey, AccountBeforeTx, ShardedKey}, tables, - test_utils::create_test_rw_db, transaction::{DbTx, DbTxMut}, BlockNumberList, }; @@ -392,20 +430,21 @@ mod tests { #[test] fn history_provider_get_account() { - let db = create_test_rw_db(); - let tx = db.tx_mut().unwrap(); + let factory = create_test_provider_factory(); + let tx = factory.provider_rw().unwrap().into_tx(); + let static_file_provider = factory.static_file_provider(); - tx.put::( + tx.put::( ShardedKey { key: ADDRESS, highest_block_number: 7 }, BlockNumberList::new([1, 3, 7]).unwrap(), ) .unwrap(); - tx.put::( + tx.put::( ShardedKey { key: ADDRESS, highest_block_number: u64::MAX }, BlockNumberList::new([10, 15]).unwrap(), ) .unwrap(); - tx.put::( + tx.put::( ShardedKey { key: HIGHER_ADDRESS, highest_block_number: u64::MAX }, BlockNumberList::new([4]).unwrap(), ) @@ -420,29 +459,29 @@ mod tests { let higher_acc_plain = Account { nonce: 4, balance: U256::ZERO, bytecode_hash: None }; // setup - tx.put::(1, AccountBeforeTx { address: ADDRESS, info: None }) + tx.put::(1, AccountBeforeTx { address: ADDRESS, info: None }) .unwrap(); - tx.put::( + tx.put::( 3, AccountBeforeTx { address: ADDRESS, info: Some(acc_at3) }, ) .unwrap(); - tx.put::( + tx.put::( 4, AccountBeforeTx { address: HIGHER_ADDRESS, info: None }, ) .unwrap(); - tx.put::( + tx.put::( 7, AccountBeforeTx { address: ADDRESS, info: Some(acc_at7) }, ) .unwrap(); - tx.put::( + tx.put::( 10, AccountBeforeTx { address: ADDRESS, info: Some(acc_at10) }, ) .unwrap(); - tx.put::( + tx.put::( 15, AccountBeforeTx { address: ADDRESS, info: Some(acc_at15) }, ) @@ -453,56 +492,75 @@ mod tests { tx.put::(HIGHER_ADDRESS, higher_acc_plain).unwrap(); tx.commit().unwrap(); - let tx = db.tx().unwrap(); + let tx = factory.provider().unwrap().into_tx(); // run - assert_eq!(HistoricalStateProviderRef::new(&tx, 1).basic_account(ADDRESS), Ok(None)); assert_eq!( - HistoricalStateProviderRef::new(&tx, 2).basic_account(ADDRESS), + HistoricalStateProviderRef::new(&tx, 1, static_file_provider.clone()) + .basic_account(ADDRESS) + .clone(), + Ok(None) + ); + assert_eq!( + HistoricalStateProviderRef::new(&tx, 2, static_file_provider.clone()) + .basic_account(ADDRESS), Ok(Some(acc_at3)) ); assert_eq!( - HistoricalStateProviderRef::new(&tx, 3).basic_account(ADDRESS), + HistoricalStateProviderRef::new(&tx, 3, static_file_provider.clone()) + .basic_account(ADDRESS), Ok(Some(acc_at3)) ); assert_eq!( - HistoricalStateProviderRef::new(&tx, 4).basic_account(ADDRESS), + HistoricalStateProviderRef::new(&tx, 4, static_file_provider.clone()) + .basic_account(ADDRESS), Ok(Some(acc_at7)) ); assert_eq!( - HistoricalStateProviderRef::new(&tx, 7).basic_account(ADDRESS), + HistoricalStateProviderRef::new(&tx, 7, static_file_provider.clone()) + .basic_account(ADDRESS), Ok(Some(acc_at7)) ); assert_eq!( - HistoricalStateProviderRef::new(&tx, 9).basic_account(ADDRESS), + HistoricalStateProviderRef::new(&tx, 9, static_file_provider.clone()) + .basic_account(ADDRESS), Ok(Some(acc_at10)) ); assert_eq!( - HistoricalStateProviderRef::new(&tx, 10).basic_account(ADDRESS), + HistoricalStateProviderRef::new(&tx, 10, static_file_provider.clone()) + .basic_account(ADDRESS), Ok(Some(acc_at10)) ); assert_eq!( - HistoricalStateProviderRef::new(&tx, 11).basic_account(ADDRESS), + HistoricalStateProviderRef::new(&tx, 11, static_file_provider.clone()) + .basic_account(ADDRESS), Ok(Some(acc_at15)) ); assert_eq!( - HistoricalStateProviderRef::new(&tx, 16).basic_account(ADDRESS), + HistoricalStateProviderRef::new(&tx, 16, static_file_provider.clone()) + .basic_account(ADDRESS), Ok(Some(acc_plain)) ); - assert_eq!(HistoricalStateProviderRef::new(&tx, 1).basic_account(HIGHER_ADDRESS), Ok(None)); assert_eq!( - HistoricalStateProviderRef::new(&tx, 1000).basic_account(HIGHER_ADDRESS), + HistoricalStateProviderRef::new(&tx, 1, static_file_provider.clone()) + .basic_account(HIGHER_ADDRESS), + Ok(None) + ); + assert_eq!( + HistoricalStateProviderRef::new(&tx, 1000, static_file_provider.clone()) + .basic_account(HIGHER_ADDRESS), Ok(Some(higher_acc_plain)) ); } #[test] fn history_provider_get_storage() { - let db = create_test_rw_db(); - let tx = db.tx_mut().unwrap(); + let factory = create_test_provider_factory(); + let tx = factory.provider_rw().unwrap().into_tx(); + let static_file_provider = factory.static_file_provider(); - tx.put::( + tx.put::( StorageShardedKey { address: ADDRESS, sharded_key: ShardedKey { key: STORAGE, highest_block_number: 7 }, @@ -510,7 +568,7 @@ mod tests { BlockNumberList::new([3, 7]).unwrap(), ) .unwrap(); - tx.put::( + tx.put::( StorageShardedKey { address: ADDRESS, sharded_key: ShardedKey { key: STORAGE, highest_block_number: u64::MAX }, @@ -518,7 +576,7 @@ mod tests { BlockNumberList::new([10, 15]).unwrap(), ) .unwrap(); - tx.put::( + tx.put::( StorageShardedKey { address: HIGHER_ADDRESS, sharded_key: ShardedKey { key: STORAGE, highest_block_number: u64::MAX }, @@ -536,63 +594,77 @@ mod tests { let entry_at3 = StorageEntry { key: STORAGE, value: U256::from(0) }; // setup - tx.put::((3, ADDRESS).into(), entry_at3).unwrap(); - tx.put::((4, HIGHER_ADDRESS).into(), higher_entry_at4).unwrap(); - tx.put::((7, ADDRESS).into(), entry_at7).unwrap(); - tx.put::((10, ADDRESS).into(), entry_at10).unwrap(); - tx.put::((15, ADDRESS).into(), entry_at15).unwrap(); + tx.put::((3, ADDRESS).into(), entry_at3).unwrap(); + tx.put::((4, HIGHER_ADDRESS).into(), higher_entry_at4).unwrap(); + tx.put::((7, ADDRESS).into(), entry_at7).unwrap(); + tx.put::((10, ADDRESS).into(), entry_at10).unwrap(); + tx.put::((15, ADDRESS).into(), entry_at15).unwrap(); // setup plain state tx.put::(ADDRESS, entry_plain).unwrap(); tx.put::(HIGHER_ADDRESS, higher_entry_plain).unwrap(); tx.commit().unwrap(); - let tx = db.tx().unwrap(); + let tx = factory.provider().unwrap().into_tx(); // run - assert_eq!(HistoricalStateProviderRef::new(&tx, 0).storage(ADDRESS, STORAGE), Ok(None)); assert_eq!( - HistoricalStateProviderRef::new(&tx, 3).storage(ADDRESS, STORAGE), + HistoricalStateProviderRef::new(&tx, 0, static_file_provider.clone()) + .storage(ADDRESS, STORAGE), + Ok(None) + ); + assert_eq!( + HistoricalStateProviderRef::new(&tx, 3, static_file_provider.clone()) + .storage(ADDRESS, STORAGE), Ok(Some(U256::ZERO)) ); assert_eq!( - HistoricalStateProviderRef::new(&tx, 4).storage(ADDRESS, STORAGE), + HistoricalStateProviderRef::new(&tx, 4, static_file_provider.clone()) + .storage(ADDRESS, STORAGE), Ok(Some(entry_at7.value)) ); assert_eq!( - HistoricalStateProviderRef::new(&tx, 7).storage(ADDRESS, STORAGE), + HistoricalStateProviderRef::new(&tx, 7, static_file_provider.clone()) + .storage(ADDRESS, STORAGE), Ok(Some(entry_at7.value)) ); assert_eq!( - HistoricalStateProviderRef::new(&tx, 9).storage(ADDRESS, STORAGE), + HistoricalStateProviderRef::new(&tx, 9, static_file_provider.clone()) + .storage(ADDRESS, STORAGE), Ok(Some(entry_at10.value)) ); assert_eq!( - HistoricalStateProviderRef::new(&tx, 10).storage(ADDRESS, STORAGE), + HistoricalStateProviderRef::new(&tx, 10, static_file_provider.clone()) + .storage(ADDRESS, STORAGE), Ok(Some(entry_at10.value)) ); assert_eq!( - HistoricalStateProviderRef::new(&tx, 11).storage(ADDRESS, STORAGE), + HistoricalStateProviderRef::new(&tx, 11, static_file_provider.clone()) + .storage(ADDRESS, STORAGE), Ok(Some(entry_at15.value)) ); assert_eq!( - HistoricalStateProviderRef::new(&tx, 16).storage(ADDRESS, STORAGE), + HistoricalStateProviderRef::new(&tx, 16, static_file_provider.clone()) + .storage(ADDRESS, STORAGE), Ok(Some(entry_plain.value)) ); assert_eq!( - HistoricalStateProviderRef::new(&tx, 1).storage(HIGHER_ADDRESS, STORAGE), + HistoricalStateProviderRef::new(&tx, 1, static_file_provider.clone()) + .storage(HIGHER_ADDRESS, STORAGE), Ok(None) ); assert_eq!( - HistoricalStateProviderRef::new(&tx, 1000).storage(HIGHER_ADDRESS, STORAGE), + HistoricalStateProviderRef::new(&tx, 1000, static_file_provider) + .storage(HIGHER_ADDRESS, STORAGE), Ok(Some(higher_entry_plain.value)) ); } #[test] fn history_provider_unavailable() { - let db = create_test_rw_db(); - let tx = db.tx().unwrap(); + let factory = create_test_provider_factory(); + let tx = factory.provider_rw().unwrap().into_tx(); + let static_file_provider = factory.static_file_provider(); // provider block_number < lowest available block number, // i.e. state at provider block is pruned @@ -603,6 +675,7 @@ mod tests { account_history_block_number: Some(3), storage_history_block_number: Some(3), }, + static_file_provider.clone(), ); assert_eq!( provider.account_history_lookup(ADDRESS), @@ -622,6 +695,7 @@ mod tests { account_history_block_number: Some(2), storage_history_block_number: Some(2), }, + static_file_provider.clone(), ); assert_eq!(provider.account_history_lookup(ADDRESS), Ok(HistoryInfo::MaybeInPlainState)); assert_eq!( @@ -638,6 +712,7 @@ mod tests { account_history_block_number: Some(1), storage_history_block_number: Some(1), }, + static_file_provider.clone(), ); assert_eq!(provider.account_history_lookup(ADDRESS), Ok(HistoryInfo::MaybeInPlainState)); assert_eq!( diff --git a/crates/storage/provider/src/providers/state/latest.rs b/crates/storage/provider/src/providers/state/latest.rs index 51616bee8c7a..29441f220579 100644 --- a/crates/storage/provider/src/providers/state/latest.rs +++ b/crates/storage/provider/src/providers/state/latest.rs @@ -1,6 +1,6 @@ use crate::{ - providers::state::macros::delegate_provider_impls, AccountReader, BlockHashReader, - BundleStateWithReceipts, StateProvider, StateRootProvider, + providers::{state::macros::delegate_provider_impls, StaticFileProvider}, + AccountReader, BlockHashReader, BundleStateWithReceipts, StateProvider, StateRootProvider, }; use reth_db::{ cursor::{DbCursorRO, DbDupCursorRO}, @@ -9,7 +9,8 @@ use reth_db::{ }; use reth_interfaces::provider::{ProviderError, ProviderResult}; use reth_primitives::{ - trie::AccountProof, Account, Address, BlockNumber, Bytecode, StorageKey, StorageValue, B256, + trie::AccountProof, Account, Address, BlockNumber, Bytecode, StaticFileSegment, StorageKey, + StorageValue, B256, }; use reth_trie::{proof::Proof, updates::TrieUpdates}; @@ -18,12 +19,14 @@ use reth_trie::{proof::Proof, updates::TrieUpdates}; pub struct LatestStateProviderRef<'b, TX: DbTx> { /// database transaction db: &'b TX, + /// Static File provider + static_file_provider: StaticFileProvider, } impl<'b, TX: DbTx> LatestStateProviderRef<'b, TX> { /// Create new state provider - pub fn new(db: &'b TX) -> Self { - Self { db } + pub fn new(db: &'b TX, static_file_provider: StaticFileProvider) -> Self { + Self { db, static_file_provider } } } @@ -37,7 +40,12 @@ impl<'b, TX: DbTx> AccountReader for LatestStateProviderRef<'b, TX> { impl<'b, TX: DbTx> BlockHashReader for LatestStateProviderRef<'b, TX> { /// Get block hash by number. fn block_hash(&self, number: u64) -> ProviderResult> { - self.db.get::(number).map_err(Into::into) + self.static_file_provider.get_with_static_file_or_database( + StaticFileSegment::Headers, + number, + |static_file| static_file.block_hash(number), + || Ok(self.db.get::(number)?), + ) } fn canonical_hashes_range( @@ -45,16 +53,23 @@ impl<'b, TX: DbTx> BlockHashReader for LatestStateProviderRef<'b, TX> { start: BlockNumber, end: BlockNumber, ) -> ProviderResult> { - let range = start..end; - self.db - .cursor_read::() - .map(|mut cursor| { - cursor - .walk_range(range)? - .map(|result| result.map(|(_, hash)| hash).map_err(Into::into)) - .collect::>>() - })? - .map_err(Into::into) + self.static_file_provider.get_range_with_static_file_or_database( + StaticFileSegment::Headers, + start..end, + |static_file, range, _| static_file.canonical_hashes_range(range.start, range.end), + |range, _| { + self.db + .cursor_read::() + .map(|mut cursor| { + cursor + .walk_range(range)? + .map(|result| result.map(|(_, hash)| hash).map_err(Into::into)) + .collect::>>() + })? + .map_err(Into::into) + }, + |_| true, + ) } } @@ -110,18 +125,20 @@ impl<'b, TX: DbTx> StateProvider for LatestStateProviderRef<'b, TX> { pub struct LatestStateProvider { /// database transaction db: TX, + /// Static File provider + static_file_provider: StaticFileProvider, } impl LatestStateProvider { /// Create new state provider - pub fn new(db: TX) -> Self { - Self { db } + pub fn new(db: TX, static_file_provider: StaticFileProvider) -> Self { + Self { db, static_file_provider } } /// Returns a new provider that takes the `TX` as reference #[inline(always)] fn as_ref(&self) -> LatestStateProviderRef<'_, TX> { - LatestStateProviderRef::new(&self.db) + LatestStateProviderRef::new(&self.db, self.static_file_provider.clone()) } } diff --git a/crates/storage/provider/src/providers/snapshot/jar.rs b/crates/storage/provider/src/providers/static_file/jar.rs similarity index 81% rename from crates/storage/provider/src/providers/snapshot/jar.rs rename to crates/storage/provider/src/providers/static_file/jar.rs index ee1519c9f2b8..92bc0bce6fb4 100644 --- a/crates/storage/provider/src/providers/snapshot/jar.rs +++ b/crates/storage/provider/src/providers/static_file/jar.rs @@ -1,58 +1,81 @@ -use super::LoadedJarRef; +use super::{ + metrics::{StaticFileProviderMetrics, StaticFileProviderOperation}, + LoadedJarRef, +}; use crate::{ to_range, BlockHashReader, BlockNumReader, HeaderProvider, ReceiptProvider, TransactionsProvider, }; use reth_db::{ codecs::CompactU256, - snapshot::{HeaderMask, ReceiptMask, SnapshotCursor, TransactionMask}, + static_file::{HeaderMask, ReceiptMask, StaticFileCursor, TransactionMask}, }; use reth_interfaces::provider::{ProviderError, ProviderResult}; use reth_primitives::{ Address, BlockHash, BlockHashOrNumber, BlockNumber, ChainInfo, Header, Receipt, SealedHeader, TransactionMeta, TransactionSigned, TransactionSignedNoHash, TxHash, TxNumber, B256, U256, }; -use std::ops::{Deref, RangeBounds}; +use std::{ + ops::{Deref, RangeBounds}, + sync::Arc, +}; /// Provider over a specific `NippyJar` and range. #[derive(Debug)] -pub struct SnapshotJarProvider<'a> { - /// Main snapshot segment +pub struct StaticFileJarProvider<'a> { + /// Main static file segment jar: LoadedJarRef<'a>, - /// Another kind of snapshot segment to help query data from the main one. + /// Another kind of static file segment to help query data from the main one. auxiliar_jar: Option>, + metrics: Option>, } -impl<'a> Deref for SnapshotJarProvider<'a> { +impl<'a> Deref for StaticFileJarProvider<'a> { type Target = LoadedJarRef<'a>; fn deref(&self) -> &Self::Target { &self.jar } } -impl<'a> From> for SnapshotJarProvider<'a> { +impl<'a> From> for StaticFileJarProvider<'a> { fn from(value: LoadedJarRef<'a>) -> Self { - SnapshotJarProvider { jar: value, auxiliar_jar: None } + StaticFileJarProvider { jar: value, auxiliar_jar: None, metrics: None } } } -impl<'a> SnapshotJarProvider<'a> { +impl<'a> StaticFileJarProvider<'a> { /// Provides a cursor for more granular data access. - pub fn cursor<'b>(&'b self) -> ProviderResult> + pub fn cursor<'b>(&'b self) -> ProviderResult> where 'b: 'a, { - SnapshotCursor::new(self.value(), self.mmap_handle()) + let result = StaticFileCursor::new(self.value(), self.mmap_handle())?; + + if let Some(metrics) = &self.metrics { + metrics.record_segment_operation( + self.segment(), + StaticFileProviderOperation::InitCursor, + None, + ); + } + + Ok(result) } - /// Adds a new auxiliar snapshot to help query data from the main one - pub fn with_auxiliar(mut self, auxiliar_jar: SnapshotJarProvider<'a>) -> Self { + /// Adds a new auxiliar static file to help query data from the main one + pub fn with_auxiliar(mut self, auxiliar_jar: StaticFileJarProvider<'a>) -> Self { self.auxiliar_jar = Some(Box::new(auxiliar_jar)); self } + + /// Enables metrics on the provider. + pub fn with_metrics(mut self, metrics: Arc) -> Self { + self.metrics = Some(metrics); + self + } } -impl<'a> HeaderProvider for SnapshotJarProvider<'a> { +impl<'a> HeaderProvider for StaticFileJarProvider<'a> { fn header(&self, block_hash: &BlockHash) -> ProviderResult> { Ok(self .cursor()? @@ -124,7 +147,7 @@ impl<'a> HeaderProvider for SnapshotJarProvider<'a> { } } -impl<'a> BlockHashReader for SnapshotJarProvider<'a> { +impl<'a> BlockHashReader for StaticFileJarProvider<'a> { fn block_hash(&self, number: u64) -> ProviderResult> { self.cursor()?.get_one::>(number.into()) } @@ -146,7 +169,7 @@ impl<'a> BlockHashReader for SnapshotJarProvider<'a> { } } -impl<'a> BlockNumReader for SnapshotJarProvider<'a> { +impl<'a> BlockNumReader for StaticFileJarProvider<'a> { fn chain_info(&self) -> ProviderResult { // Information on live database Err(ProviderError::UnsupportedProvider) @@ -167,17 +190,17 @@ impl<'a> BlockNumReader for SnapshotJarProvider<'a> { Ok(cursor .get_one::>((&hash).into())? - .and_then(|res| (res == hash).then(|| cursor.number()))) + .and_then(|res| (res == hash).then(|| cursor.number()).flatten())) } } -impl<'a> TransactionsProvider for SnapshotJarProvider<'a> { +impl<'a> TransactionsProvider for StaticFileJarProvider<'a> { fn transaction_id(&self, hash: TxHash) -> ProviderResult> { let mut cursor = self.cursor()?; Ok(cursor .get_one::>((&hash).into())? - .and_then(|res| (res.hash() == hash).then(|| cursor.number()))) + .and_then(|res| (res.hash() == hash).then(|| cursor.number()).flatten())) } fn transaction_by_id(&self, num: TxNumber) -> ProviderResult> { @@ -205,12 +228,12 @@ impl<'a> TransactionsProvider for SnapshotJarProvider<'a> { &self, _hash: TxHash, ) -> ProviderResult> { - // Information required on indexing table [`tables::TransactionBlock`] + // Information required on indexing table [`tables::TransactionBlocks`] Err(ProviderError::UnsupportedProvider) } fn transaction_block(&self, _id: TxNumber) -> ProviderResult> { - // Information on indexing table [`tables::TransactionBlock`] + // Information on indexing table [`tables::TransactionBlocks`] Err(ProviderError::UnsupportedProvider) } @@ -218,7 +241,7 @@ impl<'a> TransactionsProvider for SnapshotJarProvider<'a> { &self, _block_id: BlockHashOrNumber, ) -> ProviderResult>> { - // Related to indexing tables. Live database should get the tx_range and call snapshot + // Related to indexing tables. Live database should get the tx_range and call static file // provider with `transactions_by_tx_range` instead. Err(ProviderError::UnsupportedProvider) } @@ -227,7 +250,7 @@ impl<'a> TransactionsProvider for SnapshotJarProvider<'a> { &self, _range: impl RangeBounds, ) -> ProviderResult>> { - // Related to indexing tables. Live database should get the tx_range and call snapshot + // Related to indexing tables. Live database should get the tx_range and call static file // provider with `transactions_by_tx_range` instead. Err(ProviderError::UnsupportedProvider) } @@ -267,14 +290,14 @@ impl<'a> TransactionsProvider for SnapshotJarProvider<'a> { } } -impl<'a> ReceiptProvider for SnapshotJarProvider<'a> { +impl<'a> ReceiptProvider for StaticFileJarProvider<'a> { fn receipt(&self, num: TxNumber) -> ProviderResult> { self.cursor()?.get_one::>(num.into()) } fn receipt_by_hash(&self, hash: TxHash) -> ProviderResult> { - if let Some(tx_snapshot) = &self.auxiliar_jar { - if let Some(num) = tx_snapshot.transaction_id(hash)? { + if let Some(tx_static_file) = &self.auxiliar_jar { + if let Some(num) = tx_static_file.transaction_id(hash)? { return self.receipt(num) } } @@ -282,7 +305,7 @@ impl<'a> ReceiptProvider for SnapshotJarProvider<'a> { } fn receipts_by_block(&self, _block: BlockHashOrNumber) -> ProviderResult>> { - // Related to indexing tables. Snapshot should get the tx_range and call snapshot + // Related to indexing tables. StaticFile should get the tx_range and call static file // provider with `receipt()` instead for each Err(ProviderError::UnsupportedProvider) } diff --git a/crates/storage/provider/src/providers/static_file/manager.rs b/crates/storage/provider/src/providers/static_file/manager.rs new file mode 100644 index 000000000000..1d5a36bd5f1d --- /dev/null +++ b/crates/storage/provider/src/providers/static_file/manager.rs @@ -0,0 +1,1110 @@ +use super::{ + metrics::StaticFileProviderMetrics, LoadedJar, StaticFileJarProvider, StaticFileProviderRW, + StaticFileProviderRWRefMut, BLOCKS_PER_STATIC_FILE, +}; +use crate::{ + to_range, BlockHashReader, BlockNumReader, BlockReader, BlockSource, HeaderProvider, + ReceiptProvider, StatsReader, TransactionVariant, TransactionsProvider, + TransactionsProviderExt, WithdrawalsProvider, +}; +use dashmap::{mapref::entry::Entry as DashMapEntry, DashMap}; +use parking_lot::RwLock; +use reth_db::{ + codecs::CompactU256, + models::StoredBlockBodyIndices, + static_file::{iter_static_files, HeaderMask, ReceiptMask, StaticFileCursor, TransactionMask}, + table::Table, + tables, +}; +use reth_interfaces::provider::{ProviderError, ProviderResult}; +use reth_nippy_jar::NippyJar; +use reth_primitives::{ + keccak256, + static_file::{find_fixed_range, HighestStaticFiles, SegmentHeader, SegmentRangeInclusive}, + Address, Block, BlockHash, BlockHashOrNumber, BlockNumber, BlockWithSenders, ChainInfo, Header, + Receipt, SealedBlock, SealedBlockWithSenders, SealedHeader, StaticFileSegment, TransactionMeta, + TransactionSigned, TransactionSignedNoHash, TxHash, TxNumber, Withdrawal, Withdrawals, B256, + U256, +}; +use std::{ + collections::{hash_map::Entry, BTreeMap, HashMap}, + ops::{Deref, Range, RangeBounds, RangeInclusive}, + path::{Path, PathBuf}, + sync::{mpsc, Arc}, +}; +use tracing::warn; + +/// Alias type for a map that can be queried for block ranges from a transaction +/// segment respectively. It uses `TxNumber` to represent the transaction end of a static file +/// range. +type SegmentRanges = HashMap>; + +/// [`StaticFileProvider`] manages all existing [`StaticFileJarProvider`]. +#[derive(Debug, Default, Clone)] +pub struct StaticFileProvider(pub(crate) Arc); + +impl StaticFileProvider { + /// Creates a new [`StaticFileProvider`]. + pub fn new(path: impl AsRef) -> ProviderResult { + let provider = Self(Arc::new(StaticFileProviderInner::new(path)?)); + provider.initialize_index()?; + Ok(provider) + } +} + +impl Deref for StaticFileProvider { + type Target = StaticFileProviderInner; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +/// [`StaticFileProviderInner`] manages all existing [`StaticFileJarProvider`]. +#[derive(Debug, Default)] +pub struct StaticFileProviderInner { + /// Maintains a map which allows for concurrent access to different `NippyJars`, over different + /// segments and ranges. + map: DashMap<(BlockNumber, StaticFileSegment), LoadedJar>, + /// Max static file block for each segment + static_files_max_block: RwLock>, + /// Available static file block ranges on disk indexed by max transactions. + static_files_tx_index: RwLock, + /// Directory where static_files are located + path: PathBuf, + /// Whether [`StaticFileJarProvider`] loads filters into memory. If not, `by_hash` queries + /// won't be able to be queried directly. + load_filters: bool, + /// Maintains a map of StaticFile writers for each [`StaticFileSegment`] + writers: DashMap, + metrics: Option>, +} + +impl StaticFileProviderInner { + /// Creates a new [`StaticFileProviderInner`]. + fn new(path: impl AsRef) -> ProviderResult { + let provider = Self { + map: Default::default(), + writers: Default::default(), + static_files_max_block: Default::default(), + static_files_tx_index: Default::default(), + path: path.as_ref().to_path_buf(), + load_filters: false, + metrics: None, + }; + + Ok(provider) + } +} + +impl StaticFileProvider { + /// Loads filters into memory when creating a [`StaticFileJarProvider`]. + pub fn with_filters(self) -> Self { + let mut provider = + Arc::try_unwrap(self.0).expect("should be called when initializing only"); + provider.load_filters = true; + Self(Arc::new(provider)) + } + + /// Enables metrics on the [`StaticFileProvider`]. + pub fn with_metrics(self) -> Self { + let mut provider = + Arc::try_unwrap(self.0).expect("should be called when initializing only"); + provider.metrics = Some(Arc::new(StaticFileProviderMetrics::default())); + Self(Arc::new(provider)) + } + + /// Gets the [`StaticFileJarProvider`] of the requested segment and block. + pub fn get_segment_provider_from_block( + &self, + segment: StaticFileSegment, + block: BlockNumber, + path: Option<&Path>, + ) -> ProviderResult> { + self.get_segment_provider( + segment, + || self.get_segment_ranges_from_block(segment, block), + path, + )? + .ok_or_else(|| ProviderError::MissingStaticFileBlock(segment, block)) + } + + /// Gets the [`StaticFileJarProvider`] of the requested segment and transaction. + pub fn get_segment_provider_from_transaction( + &self, + segment: StaticFileSegment, + tx: TxNumber, + path: Option<&Path>, + ) -> ProviderResult> { + self.get_segment_provider( + segment, + || self.get_segment_ranges_from_transaction(segment, tx), + path, + )? + .ok_or_else(|| ProviderError::MissingStaticFileTx(segment, tx)) + } + + /// Gets the [`StaticFileJarProvider`] of the requested segment and block or transaction. + /// + /// `fn_range` should make sure the range goes through `find_fixed_range`. + pub fn get_segment_provider( + &self, + segment: StaticFileSegment, + fn_range: impl Fn() -> Option, + path: Option<&Path>, + ) -> ProviderResult>> { + // If we have a path, then get the block range from its name. + // Otherwise, check `self.available_static_files` + let block_range = match path { + Some(path) => StaticFileSegment::parse_filename( + &path + .file_name() + .ok_or_else(|| { + ProviderError::MissingStaticFilePath(segment, path.to_path_buf()) + })? + .to_string_lossy(), + ) + .and_then(|(parsed_segment, block_range)| { + if parsed_segment == segment { + return Some(block_range) + } + None + }), + None => fn_range(), + }; + + // Return cached `LoadedJar` or insert it for the first time, and then, return it. + if let Some(block_range) = block_range { + return Ok(Some(self.get_or_create_jar_provider(segment, &block_range)?)) + } + + Ok(None) + } + + /// Given a segment and block range it removes the cached provider from the map. + pub fn remove_cached_provider( + &self, + segment: StaticFileSegment, + fixed_block_range_end: BlockNumber, + ) { + self.map.remove(&(fixed_block_range_end, segment)); + } + + /// Given a segment and block range it deletes the jar and all files associated with it. + /// + /// CAUTION: destructive. Deletes files on disk. + pub fn delete_jar( + &self, + segment: StaticFileSegment, + fixed_block_range: SegmentRangeInclusive, + ) -> ProviderResult<()> { + let key = (fixed_block_range.end(), segment); + let jar = if let Some((_, jar)) = self.map.remove(&key) { + jar.jar + } else { + let mut jar = NippyJar::::load( + &self.path.join(segment.filename(&fixed_block_range)), + )?; + if self.load_filters { + jar.load_filters()?; + } + jar + }; + + jar.delete()?; + + let mut segment_max_block = None; + if fixed_block_range.start() > 0 { + segment_max_block = Some(fixed_block_range.start() - 1) + }; + self.update_index(segment, segment_max_block)?; + + Ok(()) + } + + /// Given a segment and block range it returns a cached + /// [`StaticFileJarProvider`]. TODO(joshie): we should check the size and pop N if there's too + /// many. + fn get_or_create_jar_provider( + &self, + segment: StaticFileSegment, + fixed_block_range: &SegmentRangeInclusive, + ) -> ProviderResult> { + let key = (fixed_block_range.end(), segment); + + // Avoid using `entry` directly to avoid a write lock in the common case. + let mut provider: StaticFileJarProvider<'_> = if let Some(jar) = self.map.get(&key) { + jar.into() + } else { + let path = self.path.join(segment.filename(fixed_block_range)); + let mut jar = NippyJar::load(&path)?; + if self.load_filters { + jar.load_filters()?; + } + + self.map.entry(key).insert(LoadedJar::new(jar)?).downgrade().into() + }; + + if let Some(metrics) = &self.metrics { + provider = provider.with_metrics(metrics.clone()); + } + Ok(provider) + } + + /// Gets a static file segment's block range from the provider inner block + /// index. + fn get_segment_ranges_from_block( + &self, + segment: StaticFileSegment, + block: u64, + ) -> Option { + self.static_files_max_block + .read() + .get(&segment) + .filter(|max| **max >= block) + .map(|_| find_fixed_range(block)) + } + + /// Gets a static file segment's fixed block range from the provider inner + /// transaction index. + fn get_segment_ranges_from_transaction( + &self, + segment: StaticFileSegment, + tx: u64, + ) -> Option { + let static_files = self.static_files_tx_index.read(); + let segment_static_files = static_files.get(&segment)?; + + // It's more probable that the request comes from a newer tx height, so we iterate + // the static_files in reverse. + let mut static_files_rev_iter = segment_static_files.iter().rev().peekable(); + + while let Some((tx_end, block_range)) = static_files_rev_iter.next() { + if tx > *tx_end { + // request tx is higher than highest static file tx + return None + } + let tx_start = static_files_rev_iter.peek().map(|(tx_end, _)| *tx_end + 1).unwrap_or(0); + if tx_start <= tx { + return Some(find_fixed_range(block_range.end())) + } + } + None + } + + /// Updates the inner transaction and block indexes alongside the internal cached providers in + /// `self.map`. + /// + /// Any entry higher than `segment_max_block` will be deleted from the previous structures. + /// + /// If `segment_max_block` is None it means there's no static file for this segment. + pub fn update_index( + &self, + segment: StaticFileSegment, + segment_max_block: Option, + ) -> ProviderResult<()> { + let mut max_block = self.static_files_max_block.write(); + let mut tx_index = self.static_files_tx_index.write(); + + match segment_max_block { + Some(segment_max_block) => { + // Update the max block for the segment + max_block.insert(segment, segment_max_block); + let fixed_range = find_fixed_range(segment_max_block); + + let jar = NippyJar::::load( + &self.path.join(segment.filename(&fixed_range)), + )?; + + // Updates the tx index by first removing all entries which have a higher + // block_start than our current static file. + if let Some(tx_range) = jar.user_header().tx_range() { + let tx_end = tx_range.end(); + + // Current block range has the same block start as `fixed_range``, but block end + // might be different if we are still filling this static file. + if let Some(current_block_range) = jar.user_header().block_range().copied() { + // Considering that `update_index` is called when we either append/truncate, + // we are sure that we are handling the latest data + // points. + // + // Here we remove every entry of the index that has a block start higher or + // equal than our current one. This is important in the case + // that we prune a lot of rows resulting in a file (and thus + // a higher block range) deletion. + tx_index + .entry(segment) + .and_modify(|index| { + index.retain(|_, block_range| { + block_range.start() < fixed_range.start() + }); + index.insert(tx_end, current_block_range); + }) + .or_insert_with(|| BTreeMap::from([(tx_end, current_block_range)])); + } + } else if let Some(1) = tx_index.get(&segment).map(|index| index.len()) { + // Only happens if we unwind all the txs/receipts from the first static file. + // Should only happen in test scenarios. + if jar.user_header().expected_block_start() == 0 && + matches!( + segment, + StaticFileSegment::Receipts | StaticFileSegment::Transactions + ) + { + tx_index.remove(&segment); + } + } + + // Update the cached provider. + self.map.insert((fixed_range.end(), segment), LoadedJar::new(jar)?); + + // Delete any cached provider that no longer has an associated jar. + self.map.retain(|(end, seg), _| !(*seg == segment && *end > fixed_range.end())); + } + None => { + tx_index.remove(&segment); + max_block.remove(&segment); + } + }; + + Ok(()) + } + + /// Initializes the inner transaction and block index + pub fn initialize_index(&self) -> ProviderResult<()> { + let mut max_block = self.static_files_max_block.write(); + let mut tx_index = self.static_files_tx_index.write(); + + tx_index.clear(); + + for (segment, ranges) in iter_static_files(&self.path)? { + // Update last block for each segment + if let Some((block_range, _)) = ranges.last() { + max_block.insert(segment, block_range.end()); + } + + // Update tx -> block_range index + for (block_range, tx_range) in ranges { + if let Some(tx_range) = tx_range { + let tx_end = tx_range.end(); + + match tx_index.entry(segment) { + Entry::Occupied(mut index) => { + index.get_mut().insert(tx_end, block_range); + } + Entry::Vacant(index) => { + index.insert(BTreeMap::from([(tx_end, block_range)])); + } + }; + } + } + } + + Ok(()) + } + + /// Gets the highest static file block if it exists for a static file segment. + pub fn get_highest_static_file_block(&self, segment: StaticFileSegment) -> Option { + self.static_files_max_block.read().get(&segment).copied() + } + + /// Gets the highest static file transaction. + pub fn get_highest_static_file_tx(&self, segment: StaticFileSegment) -> Option { + self.static_files_tx_index + .read() + .get(&segment) + .and_then(|index| index.last_key_value().map(|(last_tx, _)| *last_tx)) + } + + /// Gets the highest static file block for all segments. + pub fn get_highest_static_files(&self) -> HighestStaticFiles { + HighestStaticFiles { + headers: self.get_highest_static_file_block(StaticFileSegment::Headers), + receipts: self.get_highest_static_file_block(StaticFileSegment::Receipts), + transactions: self.get_highest_static_file_block(StaticFileSegment::Transactions), + } + } + + /// Iterates through segment static_files in reverse order, executing a function until it + /// returns some object. Useful for finding objects by [`TxHash`] or [`BlockHash`]. + pub fn find_static_file( + &self, + segment: StaticFileSegment, + func: impl Fn(StaticFileJarProvider<'_>) -> ProviderResult>, + ) -> ProviderResult> { + if let Some(highest_block) = self.get_highest_static_file_block(segment) { + let mut range = find_fixed_range(highest_block); + while range.end() > 0 { + if let Some(res) = func(self.get_or_create_jar_provider(segment, &range)?)? { + return Ok(Some(res)) + } + range = SegmentRangeInclusive::new( + range.start().saturating_sub(BLOCKS_PER_STATIC_FILE), + range.end().saturating_sub(BLOCKS_PER_STATIC_FILE), + ); + } + } + + Ok(None) + } + + /// Fetches data within a specified range across multiple static files. + /// + /// This function iteratively retrieves data using `get_fn` for each item in the given range. + /// It continues fetching until the end of the range is reached or the provided `predicate` + /// returns false. + pub fn fetch_range_with_predicate( + &self, + segment: StaticFileSegment, + range: Range, + mut get_fn: F, + mut predicate: P, + ) -> ProviderResult> + where + F: FnMut(&mut StaticFileCursor<'_>, u64) -> ProviderResult>, + P: FnMut(&T) -> bool, + { + let get_provider = |start: u64| match segment { + StaticFileSegment::Headers => { + self.get_segment_provider_from_block(segment, start, None) + } + StaticFileSegment::Transactions | StaticFileSegment::Receipts => { + self.get_segment_provider_from_transaction(segment, start, None) + } + }; + + let mut result = Vec::with_capacity((range.end - range.start).min(100) as usize); + let mut provider = get_provider(range.start)?; + let mut cursor = provider.cursor()?; + + // advances number in range + 'outer: for number in range { + // The `retrying` flag ensures a single retry attempt per `number`. If `get_fn` fails to + // access data in two different static files, it halts further attempts by returning + // an error, effectively preventing infinite retry loops. + let mut retrying = false; + + // advances static files if `get_fn` returns None + 'inner: loop { + match get_fn(&mut cursor, number)? { + Some(res) => { + if !predicate(&res) { + break 'outer + } + result.push(res); + break 'inner + } + None => { + if retrying { + warn!( + target: "provider::static_file", + ?segment, + ?number, + "Could not find block or tx number on a range request" + ); + + let err = if segment.is_headers() { + ProviderError::MissingStaticFileBlock(segment, number) + } else { + ProviderError::MissingStaticFileTx(segment, number) + }; + return Err(err) + } + provider = get_provider(number)?; + cursor = provider.cursor()?; + retrying = true; + } + } + } + } + + Ok(result) + } + + /// Fetches data within a specified range across multiple static files. + /// + /// Returns an iterator over the data + pub fn fetch_range_iter<'a, T, F>( + &'a self, + segment: StaticFileSegment, + range: Range, + get_fn: F, + ) -> ProviderResult> + 'a> + where + F: Fn(&mut StaticFileCursor<'_>, u64) -> ProviderResult> + 'a, + T: std::fmt::Debug, + { + let get_provider = move |start: u64| match segment { + StaticFileSegment::Headers => { + self.get_segment_provider_from_block(segment, start, None) + } + StaticFileSegment::Transactions | StaticFileSegment::Receipts => { + self.get_segment_provider_from_transaction(segment, start, None) + } + }; + let mut provider = get_provider(range.start)?; + + Ok(range.filter_map(move |number| { + match get_fn(&mut provider.cursor().ok()?, number).transpose() { + Some(result) => Some(result), + None => { + provider = get_provider(number).ok()?; + get_fn(&mut provider.cursor().ok()?, number).transpose() + } + } + })) + } + + /// Returns directory where static_files are located. + pub fn directory(&self) -> &Path { + &self.path + } + + /// Retrieves data from the database or static file, wherever it's available. + /// + /// # Arguments + /// * `segment` - The segment of the static file to check against. + /// * `index_key` - Requested index key, usually a block or transaction number. + /// * `fetch_from_static_file` - A closure that defines how to fetch the data from the static + /// file provider. + /// * `fetch_from_database` - A closure that defines how to fetch the data from the database + /// when the static file doesn't contain the required data or is not available. + pub fn get_with_static_file_or_database( + &self, + segment: StaticFileSegment, + number: u64, + fetch_from_static_file: FS, + fetch_from_database: FD, + ) -> ProviderResult> + where + FS: Fn(&StaticFileProvider) -> ProviderResult>, + FD: Fn() -> ProviderResult>, + { + // If there is, check the maximum block or transaction number of the segment. + let static_file_upper_bound = match segment { + StaticFileSegment::Headers => self.get_highest_static_file_block(segment), + StaticFileSegment::Transactions | StaticFileSegment::Receipts => { + self.get_highest_static_file_tx(segment) + } + }; + + if static_file_upper_bound + .map_or(false, |static_file_upper_bound| static_file_upper_bound >= number) + { + return fetch_from_static_file(self) + } + fetch_from_database() + } + + /// Gets data within a specified range, potentially spanning different static_files and + /// database. + /// + /// # Arguments + /// * `segment` - The segment of the static file to query. + /// * `block_range` - The range of data to fetch. + /// * `fetch_from_static_file` - A function to fetch data from the static_file. + /// * `fetch_from_database` - A function to fetch data from the database. + /// * `predicate` - A function used to evaluate each item in the fetched data. Fetching is + /// terminated when this function returns false, thereby filtering the data based on the + /// provided condition. + pub fn get_range_with_static_file_or_database( + &self, + segment: StaticFileSegment, + mut block_or_tx_range: Range, + fetch_from_static_file: FS, + mut fetch_from_database: FD, + mut predicate: P, + ) -> ProviderResult> + where + FS: Fn(&StaticFileProvider, Range, &mut P) -> ProviderResult>, + FD: FnMut(Range, P) -> ProviderResult>, + P: FnMut(&T) -> bool, + { + let mut data = Vec::new(); + + // If there is, check the maximum block or transaction number of the segment. + if let Some(static_file_upper_bound) = match segment { + StaticFileSegment::Headers => self.get_highest_static_file_block(segment), + StaticFileSegment::Transactions | StaticFileSegment::Receipts => { + self.get_highest_static_file_tx(segment) + } + } { + if block_or_tx_range.start <= static_file_upper_bound { + let end = block_or_tx_range.end.min(static_file_upper_bound + 1); + data.extend(fetch_from_static_file( + self, + block_or_tx_range.start..end, + &mut predicate, + )?); + block_or_tx_range.start = end; + } + } + + if block_or_tx_range.end > block_or_tx_range.start { + data.extend(fetch_from_database(block_or_tx_range, predicate)?) + } + + Ok(data) + } + + #[cfg(any(test, feature = "test-utils"))] + /// Returns static_files directory + pub fn path(&self) -> &Path { + &self.path + } +} + +/// Helper trait to manage different [`StaticFileProviderRW`] of an `Arc ProviderResult>; + + /// Returns a mutable reference to a [`StaticFileProviderRW`] of the latest + /// [`StaticFileSegment`]. + fn latest_writer( + &self, + segment: StaticFileSegment, + ) -> ProviderResult>; + + /// Commits all changes of all [`StaticFileProviderRW`] of all [`StaticFileSegment`]. + fn commit(&self) -> ProviderResult<()>; +} + +impl StaticFileWriter for StaticFileProvider { + fn get_writer( + &self, + block: BlockNumber, + segment: StaticFileSegment, + ) -> ProviderResult> { + tracing::trace!(target: "providers::static_file", ?block, ?segment, "Getting static file writer."); + Ok(match self.writers.entry(segment) { + DashMapEntry::Occupied(entry) => entry.into_ref(), + DashMapEntry::Vacant(entry) => { + let writer = StaticFileProviderRW::new( + segment, + block, + Arc::downgrade(&self.0), + self.metrics.clone(), + )?; + entry.insert(writer) + } + }) + } + + fn latest_writer( + &self, + segment: StaticFileSegment, + ) -> ProviderResult> { + self.get_writer(self.get_highest_static_file_block(segment).unwrap_or_default(), segment) + } + + fn commit(&self) -> ProviderResult<()> { + for mut writer in self.writers.iter_mut() { + writer.commit()?; + } + Ok(()) + } +} + +impl HeaderProvider for StaticFileProvider { + fn header(&self, block_hash: &BlockHash) -> ProviderResult> { + self.find_static_file(StaticFileSegment::Headers, |jar_provider| { + Ok(jar_provider + .cursor()? + .get_two::>(block_hash.into())? + .and_then(|(header, hash)| { + if &hash == block_hash { + return Some(header) + } + None + })) + }) + } + + fn header_by_number(&self, num: BlockNumber) -> ProviderResult> { + self.get_segment_provider_from_block(StaticFileSegment::Headers, num, None)? + .header_by_number(num) + } + + fn header_td(&self, block_hash: &BlockHash) -> ProviderResult> { + self.find_static_file(StaticFileSegment::Headers, |jar_provider| { + Ok(jar_provider + .cursor()? + .get_two::>(block_hash.into())? + .and_then(|(td, hash)| (&hash == block_hash).then_some(td.0))) + }) + } + + fn header_td_by_number(&self, num: BlockNumber) -> ProviderResult> { + self.get_segment_provider_from_block(StaticFileSegment::Headers, num, None)? + .header_td_by_number(num) + } + + fn headers_range(&self, range: impl RangeBounds) -> ProviderResult> { + self.fetch_range_with_predicate( + StaticFileSegment::Headers, + to_range(range), + |cursor, number| cursor.get_one::>(number.into()), + |_| true, + ) + } + + fn sealed_header(&self, num: BlockNumber) -> ProviderResult> { + self.get_segment_provider_from_block(StaticFileSegment::Headers, num, None)? + .sealed_header(num) + } + + fn sealed_headers_while( + &self, + range: impl RangeBounds, + predicate: impl FnMut(&SealedHeader) -> bool, + ) -> ProviderResult> { + self.fetch_range_with_predicate( + StaticFileSegment::Headers, + to_range(range), + |cursor, number| { + Ok(cursor + .get_two::>(number.into())? + .map(|(header, hash)| header.seal(hash))) + }, + predicate, + ) + } +} + +impl BlockHashReader for StaticFileProvider { + fn block_hash(&self, num: u64) -> ProviderResult> { + self.get_segment_provider_from_block(StaticFileSegment::Headers, num, None)?.block_hash(num) + } + + fn canonical_hashes_range( + &self, + start: BlockNumber, + end: BlockNumber, + ) -> ProviderResult> { + self.fetch_range_with_predicate( + StaticFileSegment::Headers, + start..end, + |cursor, number| cursor.get_one::>(number.into()), + |_| true, + ) + } +} + +impl ReceiptProvider for StaticFileProvider { + fn receipt(&self, num: TxNumber) -> ProviderResult> { + self.get_segment_provider_from_transaction(StaticFileSegment::Receipts, num, None)? + .receipt(num) + } + + fn receipt_by_hash(&self, hash: TxHash) -> ProviderResult> { + if let Some(num) = self.transaction_id(hash)? { + return self.receipt(num) + } + Ok(None) + } + + fn receipts_by_block(&self, _block: BlockHashOrNumber) -> ProviderResult>> { + unreachable!() + } + + fn receipts_by_tx_range( + &self, + range: impl RangeBounds, + ) -> ProviderResult> { + self.fetch_range_with_predicate( + StaticFileSegment::Receipts, + to_range(range), + |cursor, number| cursor.get_one::>(number.into()), + |_| true, + ) + } +} + +impl TransactionsProviderExt for StaticFileProvider { + fn transaction_hashes_by_range( + &self, + tx_range: Range, + ) -> ProviderResult> { + let tx_range_size = (tx_range.end - tx_range.start) as usize; + + // Transactions are different size, so chunks will not all take the same processing time. If + // chunks are too big, there will be idle threads waiting for work. Choosing an + // arbitrary smaller value to make sure it doesn't happen. + let chunk_size = 100; + + let chunks = (tx_range.start..tx_range.end) + .step_by(chunk_size) + .map(|start| start..std::cmp::min(start + chunk_size as u64, tx_range.end)) + .collect::>>(); + let mut channels = Vec::with_capacity(chunk_size); + + #[inline] + fn calculate_hash( + entry: (TxNumber, TransactionSignedNoHash), + rlp_buf: &mut Vec, + ) -> Result<(B256, TxNumber), Box> { + let (tx_id, tx) = entry; + tx.transaction.encode_with_signature(&tx.signature, rlp_buf, false); + Ok((keccak256(rlp_buf), tx_id)) + } + + for chunk_range in chunks { + let (channel_tx, channel_rx) = mpsc::channel(); + channels.push(channel_rx); + + let manager = self.clone(); + + // Spawn the task onto the global rayon pool + // This task will send the results through the channel after it has calculated + // the hash. + rayon::spawn(move || { + let mut rlp_buf = Vec::with_capacity(128); + let _ = manager.fetch_range_with_predicate( + StaticFileSegment::Transactions, + chunk_range, + |cursor, number| { + Ok(cursor + .get_one::>(number.into())? + .map(|transaction| { + rlp_buf.clear(); + let _ = channel_tx + .send(calculate_hash((number, transaction), &mut rlp_buf)); + })) + }, + |_| true, + ); + }); + } + + let mut tx_list = Vec::with_capacity(tx_range_size); + + // Iterate over channels and append the tx hashes unsorted + for channel in channels { + while let Ok(tx) = channel.recv() { + let (tx_hash, tx_id) = tx.map_err(|boxed| *boxed)?; + tx_list.push((tx_hash, tx_id)); + } + } + + Ok(tx_list) + } +} + +impl TransactionsProvider for StaticFileProvider { + fn transaction_id(&self, tx_hash: TxHash) -> ProviderResult> { + self.find_static_file(StaticFileSegment::Transactions, |jar_provider| { + let mut cursor = jar_provider.cursor()?; + if cursor + .get_one::>((&tx_hash).into())? + .and_then(|tx| (tx.hash() == tx_hash).then_some(tx)) + .is_some() + { + Ok(cursor.number()) + } else { + Ok(None) + } + }) + } + + fn transaction_by_id(&self, num: TxNumber) -> ProviderResult> { + self.get_segment_provider_from_transaction(StaticFileSegment::Transactions, num, None)? + .transaction_by_id(num) + } + + fn transaction_by_id_no_hash( + &self, + num: TxNumber, + ) -> ProviderResult> { + self.get_segment_provider_from_transaction(StaticFileSegment::Transactions, num, None)? + .transaction_by_id_no_hash(num) + } + + fn transaction_by_hash(&self, hash: TxHash) -> ProviderResult> { + self.find_static_file(StaticFileSegment::Transactions, |jar_provider| { + Ok(jar_provider + .cursor()? + .get_one::>((&hash).into())? + .map(|tx| tx.with_hash()) + .and_then(|tx| (tx.hash_ref() == &hash).then_some(tx))) + }) + } + + fn transaction_by_hash_with_meta( + &self, + _hash: TxHash, + ) -> ProviderResult> { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } + + fn transaction_block(&self, _id: TxNumber) -> ProviderResult> { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } + + fn transactions_by_block( + &self, + _block_id: BlockHashOrNumber, + ) -> ProviderResult>> { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } + + fn transactions_by_block_range( + &self, + _range: impl RangeBounds, + ) -> ProviderResult>> { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } + + fn senders_by_tx_range( + &self, + range: impl RangeBounds, + ) -> ProviderResult> { + let txes = self.transactions_by_tx_range(range)?; + TransactionSignedNoHash::recover_signers(&txes, txes.len()) + .ok_or(ProviderError::SenderRecoveryError) + } + + fn transactions_by_tx_range( + &self, + range: impl RangeBounds, + ) -> ProviderResult> { + self.fetch_range_with_predicate( + StaticFileSegment::Transactions, + to_range(range), + |cursor, number| { + cursor.get_one::>(number.into()) + }, + |_| true, + ) + } + + fn transaction_sender(&self, id: TxNumber) -> ProviderResult> { + Ok(self.transaction_by_id_no_hash(id)?.and_then(|tx| tx.recover_signer())) + } +} + +/* Cannot be successfully implemented but must exist for trait requirements */ + +impl BlockNumReader for StaticFileProvider { + fn chain_info(&self) -> ProviderResult { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } + + fn best_block_number(&self) -> ProviderResult { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } + + fn last_block_number(&self) -> ProviderResult { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } + + fn block_number(&self, _hash: B256) -> ProviderResult> { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } +} + +impl BlockReader for StaticFileProvider { + fn find_block_by_hash( + &self, + _hash: B256, + _source: BlockSource, + ) -> ProviderResult> { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } + + fn block(&self, _id: BlockHashOrNumber) -> ProviderResult> { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } + + fn pending_block(&self) -> ProviderResult> { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } + + fn pending_block_with_senders(&self) -> ProviderResult> { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } + + fn pending_block_and_receipts(&self) -> ProviderResult)>> { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } + + fn ommers(&self, _id: BlockHashOrNumber) -> ProviderResult>> { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } + + fn block_body_indices(&self, _num: u64) -> ProviderResult> { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } + + fn block_with_senders( + &self, + _id: BlockHashOrNumber, + _transaction_kind: TransactionVariant, + ) -> ProviderResult> { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } + + fn block_range(&self, _range: RangeInclusive) -> ProviderResult> { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } +} + +impl WithdrawalsProvider for StaticFileProvider { + fn withdrawals_by_block( + &self, + _id: BlockHashOrNumber, + _timestamp: u64, + ) -> ProviderResult> { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } + + fn latest_withdrawal(&self) -> ProviderResult> { + // Required data not present in static_files + Err(ProviderError::UnsupportedProvider) + } +} + +impl StatsReader for StaticFileProvider { + fn count_entries(&self) -> ProviderResult { + match T::NAME { + tables::CanonicalHeaders::NAME | + tables::Headers::NAME | + tables::HeaderTerminalDifficulties::NAME => Ok(self + .get_highest_static_file_block(StaticFileSegment::Headers) + .map(|block| block + 1) + .unwrap_or_default() + as usize), + tables::Receipts::NAME => Ok(self + .get_highest_static_file_tx(StaticFileSegment::Receipts) + .map(|receipts| receipts + 1) + .unwrap_or_default() as usize), + tables::Transactions::NAME => Ok(self + .get_highest_static_file_tx(StaticFileSegment::Transactions) + .map(|txs| txs + 1) + .unwrap_or_default() as usize), + _ => Err(ProviderError::UnsupportedProvider), + } + } +} diff --git a/crates/storage/provider/src/providers/static_file/metrics.rs b/crates/storage/provider/src/providers/static_file/metrics.rs new file mode 100644 index 000000000000..497620b64b87 --- /dev/null +++ b/crates/storage/provider/src/providers/static_file/metrics.rs @@ -0,0 +1,90 @@ +use std::{collections::HashMap, time::Duration}; + +use itertools::Itertools; +use metrics::{Counter, Histogram}; +use reth_metrics::Metrics; +use reth_primitives::StaticFileSegment; +use strum::{EnumIter, IntoEnumIterator}; + +/// Metrics for the static file provider. +#[derive(Debug)] +pub struct StaticFileProviderMetrics { + segment_operations: HashMap< + (StaticFileSegment, StaticFileProviderOperation), + StaticFileProviderOperationMetrics, + >, +} + +impl Default for StaticFileProviderMetrics { + fn default() -> Self { + Self { + segment_operations: StaticFileSegment::iter() + .cartesian_product(StaticFileProviderOperation::iter()) + .map(|(segment, operation)| { + ( + (segment, operation), + StaticFileProviderOperationMetrics::new_with_labels(&[ + ("segment", segment.as_str()), + ("operation", operation.as_str()), + ]), + ) + }) + .collect(), + } + } +} + +impl StaticFileProviderMetrics { + pub(crate) fn record_segment_operation( + &self, + segment: StaticFileSegment, + operation: StaticFileProviderOperation, + duration: Option, + ) { + self.segment_operations + .get(&(segment, operation)) + .expect("segment operation metrics should exist") + .calls_total + .increment(1); + + if let Some(duration) = duration { + self.segment_operations + .get(&(segment, operation)) + .expect("segment operation metrics should exist") + .write_duration_seconds + .record(duration.as_secs_f64()); + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, EnumIter)] +pub(crate) enum StaticFileProviderOperation { + InitCursor, + OpenWriter, + Append, + Prune, + IncrementBlock, + CommitWriter, +} + +impl StaticFileProviderOperation { + const fn as_str(&self) -> &'static str { + match self { + Self::InitCursor => "init-cursor", + Self::OpenWriter => "open-writer", + Self::Append => "append", + Self::Prune => "prune", + Self::IncrementBlock => "increment-block", + Self::CommitWriter => "commit-writer", + } + } +} + +#[derive(Metrics)] +#[metrics(scope = "static_files.jar_provider")] +pub(crate) struct StaticFileProviderOperationMetrics { + /// Total number of static file jar provider operations made. + calls_total: Counter, + /// The time it took to execute the static file jar provider operation that writes data. + write_duration_seconds: Histogram, +} diff --git a/crates/storage/provider/src/providers/snapshot/mod.rs b/crates/storage/provider/src/providers/static_file/mod.rs similarity index 68% rename from crates/storage/provider/src/providers/snapshot/mod.rs rename to crates/storage/provider/src/providers/static_file/mod.rs index 7a9327c291ba..be1db10b15bc 100644 --- a/crates/storage/provider/src/providers/snapshot/mod.rs +++ b/crates/storage/provider/src/providers/static_file/mod.rs @@ -1,18 +1,25 @@ mod manager; -pub use manager::SnapshotProvider; +pub use manager::{StaticFileProvider, StaticFileWriter}; mod jar; -pub use jar::SnapshotJarProvider; +pub use jar::StaticFileJarProvider; + +mod writer; +pub use writer::{StaticFileProviderRW, StaticFileProviderRWRefMut}; + +mod metrics; use reth_interfaces::provider::ProviderResult; use reth_nippy_jar::NippyJar; -use reth_primitives::{snapshot::SegmentHeader, SnapshotSegment}; +use reth_primitives::{static_file::SegmentHeader, StaticFileSegment}; use std::{ops::Deref, sync::Arc}; +const BLOCKS_PER_STATIC_FILE: u64 = 500_000; + /// Alias type for each specific `NippyJar`. -type LoadedJarRef<'a> = dashmap::mapref::one::Ref<'a, (u64, SnapshotSegment), LoadedJar>; +type LoadedJarRef<'a> = dashmap::mapref::one::Ref<'a, (u64, StaticFileSegment), LoadedJar>; -/// Helper type to reuse an associated snapshot mmap handle on created cursors. +/// Helper type to reuse an associated static file mmap handle on created cursors. #[derive(Debug)] pub struct LoadedJar { jar: NippyJar, @@ -29,6 +36,10 @@ impl LoadedJar { fn mmap_handle(&self) -> Arc { self.mmap_handle.clone() } + + fn segment(&self) -> StaticFileSegment { + self.jar.user_header().segment() + } } impl Deref for LoadedJar { @@ -45,25 +56,31 @@ mod tests { use rand::seq::SliceRandom; use reth_db::{ cursor::DbCursorRO, - snapshot::create_snapshot_T1_T2_T3, + static_file::create_static_file_T1_T2_T3, transaction::{DbTx, DbTxMut}, - CanonicalHeaders, HeaderNumbers, HeaderTD, Headers, RawTable, + CanonicalHeaders, HeaderNumbers, HeaderTerminalDifficulties, Headers, RawTable, }; use reth_interfaces::test_utils::generators::{self, random_header_range}; - use reth_primitives::{BlockNumber, B256, U256}; + use reth_primitives::{static_file::find_fixed_range, BlockNumber, B256, U256}; #[test] fn test_snap() { // Ranges let row_count = 100u64; let range = 0..=(row_count - 1); - let segment_header = - SegmentHeader::new(range.clone(), range.clone(), SnapshotSegment::Headers); + let segment_header = SegmentHeader::new( + range.clone().into(), + Some(range.clone().into()), + Some(range.clone().into()), + StaticFileSegment::Headers, + ); // Data sources let factory = create_test_provider_factory(); - let snap_path = tempfile::tempdir().unwrap(); - let snap_file = snap_path.path().join(SnapshotSegment::Headers.filename(&range, &range)); + let static_files_path = tempfile::tempdir().unwrap(); + let static_file = static_files_path + .path() + .join(StaticFileSegment::Headers.filename(&find_fixed_range(*range.end()))); // Setup data let mut headers = random_header_range( @@ -81,17 +98,17 @@ mod tests { tx.put::(header.number, hash).unwrap(); tx.put::(header.number, header.clone().unseal()).unwrap(); - tx.put::(header.number, td.into()).unwrap(); + tx.put::(header.number, td.into()).unwrap(); tx.put::(hash, header.number).unwrap(); } provider_rw.commit().unwrap(); - // Create Snapshot + // Create StaticFile { let with_compression = true; let with_filter = true; - let mut nippy_jar = NippyJar::new(3, snap_file.as_path(), segment_header); + let mut nippy_jar = NippyJar::new(3, static_file.as_path(), segment_header); if with_compression { nippy_jar = nippy_jar.with_zstd(false, 0); @@ -115,24 +132,22 @@ mod tests { .unwrap() .map(|row| row.map(|(_key, value)| value.into_value()).map_err(|e| e.into())); - create_snapshot_T1_T2_T3::< + create_static_file_T1_T2_T3::< Headers, - HeaderTD, + HeaderTerminalDifficulties, CanonicalHeaders, BlockNumber, SegmentHeader, - >( - tx, range, None, none_vec, Some(hashes), row_count as usize, &mut nippy_jar - ) + >(tx, range, None, none_vec, Some(hashes), row_count as usize, nippy_jar) .unwrap(); } // Use providers to query Header data and compare if it matches { let db_provider = factory.provider().unwrap(); - let manager = SnapshotProvider::new(snap_path.path()).unwrap().with_filters(); + let manager = StaticFileProvider::new(static_files_path.path()).unwrap().with_filters(); let jar_provider = manager - .get_segment_provider_from_block(SnapshotSegment::Headers, 0, Some(&snap_file)) + .get_segment_provider_from_block(StaticFileSegment::Headers, 0, Some(&static_file)) .unwrap(); assert!(!headers.is_empty()); @@ -148,7 +163,7 @@ mod tests { assert_eq!(header, db_provider.header(&header_hash).unwrap().unwrap()); assert_eq!(header, jar_provider.header(&header_hash).unwrap().unwrap()); - // Compare HeaderTD + // Compare HeaderTerminalDifficulties assert_eq!( db_provider.header_td(&header_hash).unwrap().unwrap(), jar_provider.header_td(&header_hash).unwrap().unwrap() diff --git a/crates/storage/provider/src/providers/static_file/writer.rs b/crates/storage/provider/src/providers/static_file/writer.rs new file mode 100644 index 000000000000..b30f81bcd816 --- /dev/null +++ b/crates/storage/provider/src/providers/static_file/writer.rs @@ -0,0 +1,488 @@ +use crate::providers::static_file::metrics::StaticFileProviderOperation; + +use super::{ + manager::StaticFileProviderInner, metrics::StaticFileProviderMetrics, StaticFileProvider, +}; +use dashmap::mapref::one::RefMut; +use reth_codecs::Compact; +use reth_db::codecs::CompactU256; +use reth_interfaces::provider::{ProviderError, ProviderResult}; +use reth_nippy_jar::{NippyJar, NippyJarError, NippyJarWriter}; +use reth_primitives::{ + static_file::{find_fixed_range, SegmentHeader, SegmentRangeInclusive}, + BlockHash, BlockNumber, Header, Receipt, StaticFileSegment, TransactionSignedNoHash, TxNumber, + U256, +}; +use std::{ + path::{Path, PathBuf}, + sync::{Arc, Weak}, + time::Instant, +}; +use tracing::debug; + +/// Mutable reference to a dashmap element of [`StaticFileProviderRW`]. +pub type StaticFileProviderRWRefMut<'a> = RefMut<'a, StaticFileSegment, StaticFileProviderRW>; + +#[derive(Debug)] +/// Extends `StaticFileProvider` with writing capabilities +pub struct StaticFileProviderRW { + /// Reference back to the provider. We need [Weak] here because [StaticFileProviderRW] is + /// stored in a [dashmap::DashMap] inside the parent [StaticFileProvider].which is an [Arc]. + /// If we were to use an [Arc] here, we would create a reference cycle. + reader: Weak, + writer: NippyJarWriter, + data_path: PathBuf, + buf: Vec, + metrics: Option>, +} + +impl StaticFileProviderRW { + /// Creates a new [`StaticFileProviderRW`] for a [`StaticFileSegment`]. + pub fn new( + segment: StaticFileSegment, + block: BlockNumber, + reader: Weak, + metrics: Option>, + ) -> ProviderResult { + let (writer, data_path) = Self::open(segment, block, reader.clone(), metrics.clone())?; + Ok(Self { writer, data_path, buf: Vec::with_capacity(100), reader, metrics }) + } + + fn open( + segment: StaticFileSegment, + block: u64, + reader: Weak, + metrics: Option>, + ) -> ProviderResult<(NippyJarWriter, PathBuf)> { + let start = Instant::now(); + + let static_file_provider = Self::upgrade_provider_to_strong_reference(&reader); + + let block_range = find_fixed_range(block); + let (jar, path) = match static_file_provider.get_segment_provider_from_block( + segment, + block_range.start(), + None, + ) { + Ok(provider) => (NippyJar::load(provider.data_path())?, provider.data_path().into()), + Err(ProviderError::MissingStaticFileBlock(_, _)) => { + let path = static_file_provider.directory().join(segment.filename(&block_range)); + (create_jar(segment, &path, block_range), path) + } + Err(err) => return Err(err), + }; + + let result = match NippyJarWriter::new(jar) { + Ok(writer) => Ok((writer, path)), + Err(NippyJarError::FrozenJar) => { + // This static file has been frozen, so we should + Err(ProviderError::FinalizedStaticFile(segment, block)) + } + Err(e) => Err(e.into()), + }?; + + if let Some(metrics) = &metrics { + metrics.record_segment_operation( + segment, + StaticFileProviderOperation::OpenWriter, + Some(start.elapsed()), + ); + } + + Ok(result) + } + + /// Commits configuration changes to disk and updates the reader index with the new changes. + pub fn commit(&mut self) -> ProviderResult<()> { + let start = Instant::now(); + + // Commits offsets and new user_header to disk + self.writer.commit()?; + + if let Some(metrics) = &self.metrics { + metrics.record_segment_operation( + self.writer.user_header().segment(), + StaticFileProviderOperation::CommitWriter, + Some(start.elapsed()), + ); + } + + debug!( + target: "provider::static_file", + segment = ?self.writer.user_header().segment(), + path = ?self.data_path, + duration = ?start.elapsed(), + "Commit" + ); + + self.update_index()?; + + Ok(()) + } + + /// Updates the `self.reader` internal index. + fn update_index(&self) -> ProviderResult<()> { + // We find the maximum block of the segment by checking this writer's last block. + // + // However if there's no block range (because there's no data), we try to calculate it by + // substracting 1 from the expected block start, resulting on the last block of the + // previous file. + // + // If that expected block start is 0, then it means that there's no actual block data, and + // there's no block data in static files. + let segment_max_block = match self.writer.user_header().block_range() { + Some(block_range) => Some(block_range.end()), + None => { + if self.writer.user_header().expected_block_start() > 0 { + Some(self.writer.user_header().expected_block_start() - 1) + } else { + None + } + } + }; + + self.reader().update_index(self.writer.user_header().segment(), segment_max_block) + } + + /// Allows to increment the [`SegmentHeader`] end block. It will commit the current static file, + /// and create the next one if we are past the end range. + /// + /// Returns the current [`BlockNumber`] as seen in the static file. + pub fn increment_block(&mut self, segment: StaticFileSegment) -> ProviderResult { + let start = Instant::now(); + if let Some(last_block) = self.writer.user_header().block_end() { + // We have finished the previous static file and must freeze it + if last_block == self.writer.user_header().expected_block_end() { + // Commits offsets and new user_header to disk + self.commit()?; + + // Opens the new static file + let (writer, data_path) = + Self::open(segment, last_block + 1, self.reader.clone(), self.metrics.clone())?; + self.writer = writer; + self.data_path = data_path; + + *self.writer.user_header_mut() = + SegmentHeader::new(find_fixed_range(last_block + 1), None, None, segment); + } + } + + let block = self.writer.user_header_mut().increment_block(); + if let Some(metrics) = &self.metrics { + metrics.record_segment_operation( + segment, + StaticFileProviderOperation::IncrementBlock, + Some(start.elapsed()), + ); + } + + Ok(block) + } + + /// Truncates a number of rows from disk. It deletes and loads an older static file if block + /// goes beyond the start of the current block range. + /// + /// **last_block** should be passed only with transaction based segments. + /// + /// # Note + /// Commits to the configuration file at the end. + fn truncate( + &mut self, + segment: StaticFileSegment, + mut num_rows: u64, + last_block: Option, + ) -> ProviderResult<()> { + while num_rows > 0 { + let len = match segment { + StaticFileSegment::Headers => { + self.writer.user_header().block_len().unwrap_or_default() + } + StaticFileSegment::Transactions | StaticFileSegment::Receipts => { + self.writer.user_header().tx_len().unwrap_or_default() + } + }; + + if num_rows >= len { + // If there's more rows to delete than this static file contains, then just + // delete the whole file and go to the next static file + let previous_snap = self.data_path.clone(); + let block_start = self.writer.user_header().expected_block_start(); + + if block_start != 0 { + let (writer, data_path) = Self::open( + segment, + self.writer.user_header().expected_block_start() - 1, + self.reader.clone(), + self.metrics.clone(), + )?; + self.writer = writer; + self.data_path = data_path; + + NippyJar::::load(&previous_snap)?.delete()?; + } else { + // Update `SegmentHeader` + self.writer.user_header_mut().prune(len); + self.writer.prune_rows(len as usize)?; + break + } + + num_rows -= len; + } else { + // Update `SegmentHeader` + self.writer.user_header_mut().prune(num_rows); + + // Truncate data + self.writer.prune_rows(num_rows as usize)?; + num_rows = 0; + } + } + + // Only Transactions and Receipts + if let Some(last_block) = last_block { + let header = self.writer.user_header_mut(); + header.set_block_range(header.expected_block_start(), last_block); + } + + // Commits new changes to disk. + self.commit()?; + + Ok(()) + } + + /// Appends column to static file. + fn append_column(&mut self, column: T) -> ProviderResult<()> { + self.buf.clear(); + column.to_compact(&mut self.buf); + + self.writer.append_column(Some(Ok(&self.buf)))?; + Ok(()) + } + + /// Appends to tx number-based static file. + /// + /// Returns the current [`TxNumber`] as seen in the static file. + fn append_with_tx_number( + &mut self, + segment: StaticFileSegment, + tx_num: TxNumber, + value: V, + ) -> ProviderResult { + debug_assert!(self.writer.user_header().segment() == segment); + + if self.writer.user_header().tx_range().is_none() { + self.writer.user_header_mut().set_tx_range(tx_num, tx_num); + } else { + self.writer.user_header_mut().increment_tx(); + } + + self.append_column(value)?; + + Ok(self.writer.user_header().tx_end().expect("qed")) + } + + /// Appends header to static file. + /// + /// It **CALLS** `increment_block()` since the number of headers is equal to the number of + /// blocks. + /// + /// Returns the current [`BlockNumber`] as seen in the static file. + pub fn append_header( + &mut self, + header: Header, + terminal_difficulty: U256, + hash: BlockHash, + ) -> ProviderResult { + let start = Instant::now(); + + debug_assert!(self.writer.user_header().segment() == StaticFileSegment::Headers); + + let block_number = self.increment_block(StaticFileSegment::Headers)?; + + self.append_column(header)?; + self.append_column(CompactU256::from(terminal_difficulty))?; + self.append_column(hash)?; + + if let Some(metrics) = &self.metrics { + metrics.record_segment_operation( + StaticFileSegment::Headers, + StaticFileProviderOperation::Append, + Some(start.elapsed()), + ); + } + + Ok(block_number) + } + + /// Appends transaction to static file. + /// + /// It **DOES NOT CALL** `increment_block()`, it should be handled elsewhere. There might be + /// empty blocks and this function wouldn't be called. + /// + /// Returns the current [`TxNumber`] as seen in the static file. + pub fn append_transaction( + &mut self, + tx_num: TxNumber, + tx: TransactionSignedNoHash, + ) -> ProviderResult { + let start = Instant::now(); + + let result = self.append_with_tx_number(StaticFileSegment::Transactions, tx_num, tx)?; + + if let Some(metrics) = &self.metrics { + metrics.record_segment_operation( + StaticFileSegment::Transactions, + StaticFileProviderOperation::Append, + Some(start.elapsed()), + ); + } + + Ok(result) + } + + /// Appends receipt to static file. + /// + /// It **DOES NOT** call `increment_block()`, it should be handled elsewhere. There might be + /// empty blocks and this function wouldn't be called. + /// + /// Returns the current [`TxNumber`] as seen in the static file. + pub fn append_receipt( + &mut self, + tx_num: TxNumber, + receipt: Receipt, + ) -> ProviderResult { + let start = Instant::now(); + + let result = self.append_with_tx_number(StaticFileSegment::Receipts, tx_num, receipt)?; + + if let Some(metrics) = &self.metrics { + metrics.record_segment_operation( + StaticFileSegment::Receipts, + StaticFileProviderOperation::Append, + Some(start.elapsed()), + ); + } + + Ok(result) + } + + /// Removes the last `number` of transactions from static files. + /// + /// # Note + /// Commits to the configuration file at the end. + pub fn prune_transactions( + &mut self, + number: u64, + last_block: BlockNumber, + ) -> ProviderResult<()> { + let start = Instant::now(); + + let segment = StaticFileSegment::Transactions; + debug_assert!(self.writer.user_header().segment() == segment); + + self.truncate(segment, number, Some(last_block))?; + + if let Some(metrics) = &self.metrics { + metrics.record_segment_operation( + StaticFileSegment::Transactions, + StaticFileProviderOperation::Prune, + Some(start.elapsed()), + ); + } + + Ok(()) + } + + /// Prunes `to_delete` number of receipts from static_files. + /// + /// # Note + /// Commits to the configuration file at the end. + pub fn prune_receipts( + &mut self, + to_delete: u64, + last_block: BlockNumber, + ) -> ProviderResult<()> { + let start = Instant::now(); + + let segment = StaticFileSegment::Receipts; + debug_assert!(self.writer.user_header().segment() == segment); + + self.truncate(segment, to_delete, Some(last_block))?; + + if let Some(metrics) = &self.metrics { + metrics.record_segment_operation( + StaticFileSegment::Receipts, + StaticFileProviderOperation::Prune, + Some(start.elapsed()), + ); + } + + Ok(()) + } + + /// Prunes `to_delete` number of headers from static_files. + /// + /// # Note + /// Commits to the configuration file at the end. + pub fn prune_headers(&mut self, to_delete: u64) -> ProviderResult<()> { + let start = Instant::now(); + + let segment = StaticFileSegment::Headers; + debug_assert!(self.writer.user_header().segment() == segment); + + self.truncate(segment, to_delete, None)?; + + if let Some(metrics) = &self.metrics { + metrics.record_segment_operation( + StaticFileSegment::Headers, + StaticFileProviderOperation::Prune, + Some(start.elapsed()), + ); + } + + Ok(()) + } + + fn reader(&self) -> StaticFileProvider { + Self::upgrade_provider_to_strong_reference(&self.reader) + } + + /// Upgrades a weak reference of [`StaticFileProviderInner`] to a strong reference + /// [`StaticFileProvider`]. + /// + /// # Panics + /// + /// Panics if the parent [`StaticFileProvider`] is fully dropped while the child writer is still + /// active. In reality, it's impossible to detach the [`StaticFileProviderRW`] from the + /// [`StaticFileProvider`]. + fn upgrade_provider_to_strong_reference( + provider: &Weak, + ) -> StaticFileProvider { + provider.upgrade().map(StaticFileProvider).expect("StaticFileProvider is dropped") + } + + #[cfg(any(test, feature = "test-utils"))] + /// Helper function to override block range for testing. + pub fn set_block_range(&mut self, block_range: std::ops::RangeInclusive) { + self.writer.user_header_mut().set_block_range(*block_range.start(), *block_range.end()) + } +} + +fn create_jar( + segment: StaticFileSegment, + path: &Path, + expected_block_range: SegmentRangeInclusive, +) -> NippyJar { + let mut jar = NippyJar::new( + segment.columns(), + path, + SegmentHeader::new(expected_block_range, None, None, segment), + ); + + // Transaction and Receipt already have the compression scheme used natively in its encoding. + // (zstd-dictionary) + if segment.is_headers() { + jar = jar.with_lz4(); + } + + jar +} diff --git a/crates/storage/provider/src/test_utils/blocks.rs b/crates/storage/provider/src/test_utils/blocks.rs index 83fa65c87ed9..05c98c787660 100644 --- a/crates/storage/provider/src/test_utils/blocks.rs +++ b/crates/storage/provider/src/test_utils/blocks.rs @@ -26,7 +26,10 @@ pub fn assert_genesis_block(provider: &DatabaseProviderRW, g: assert_eq!(tx.table::().unwrap(), vec![(h, n)]); assert_eq!(tx.table::().unwrap(), vec![(n, h)]); - assert_eq!(tx.table::().unwrap(), vec![(n, g.difficulty.into())]); + assert_eq!( + tx.table::().unwrap(), + vec![(n, g.difficulty.into())] + ); assert_eq!( tx.table::().unwrap(), vec![(0, StoredBlockBodyIndices::default())] @@ -34,23 +37,23 @@ pub fn assert_genesis_block(provider: &DatabaseProviderRW, g: assert_eq!(tx.table::().unwrap(), vec![]); assert_eq!(tx.table::().unwrap(), vec![]); assert_eq!(tx.table::().unwrap(), vec![]); - assert_eq!(tx.table::().unwrap(), vec![]); - assert_eq!(tx.table::().unwrap(), vec![]); + assert_eq!(tx.table::().unwrap(), vec![]); + assert_eq!(tx.table::().unwrap(), vec![]); assert_eq!(tx.table::().unwrap(), vec![]); assert_eq!(tx.table::().unwrap(), vec![]); assert_eq!(tx.table::().unwrap(), vec![]); - assert_eq!(tx.table::().unwrap(), vec![]); - assert_eq!(tx.table::().unwrap(), vec![]); + assert_eq!(tx.table::().unwrap(), vec![]); + assert_eq!(tx.table::().unwrap(), vec![]); // TODO check after this gets done: https://github.com/paradigmxyz/reth/issues/1588 // Bytecodes are not reverted assert_eq!(tx.table::().unwrap(), vec![]); - assert_eq!(tx.table::().unwrap(), vec![]); - assert_eq!(tx.table::().unwrap(), vec![]); - assert_eq!(tx.table::().unwrap(), vec![]); - assert_eq!(tx.table::().unwrap(), vec![]); + assert_eq!(tx.table::().unwrap(), vec![]); + assert_eq!(tx.table::().unwrap(), vec![]); + assert_eq!(tx.table::().unwrap(), vec![]); + assert_eq!(tx.table::().unwrap(), vec![]); assert_eq!(tx.table::().unwrap(), vec![]); assert_eq!(tx.table::().unwrap(), vec![]); - assert_eq!(tx.table::().unwrap(), vec![]); - // SyncStage is not updated in tests + assert_eq!(tx.table::().unwrap(), vec![]); + // StageCheckpoints is not updated in tests } const BLOCK_RLP: [u8; 610] = hex!("f9025ff901f7a0c86e8cc0310ae7c531c758678ddbfd16fc51c8cef8cec650b032de9869e8b94fa01dcc4de8dec75d7aab85b567b6ccd41ad312451b948a7413f0a142fd40d49347942adc25665018aa1fe0e6bc666dac8fc2697ff9baa050554882fbbda2c2fd93fdc466db9946ea262a67f7a76cc169e714f105ab583da00967f09ef1dfed20c0eacfaa94d5cd4002eda3242ac47eae68972d07b106d192a0e3c8b47fbfc94667ef4cceb17e5cc21e3b1eebd442cebb27f07562b33836290db90100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000008302000001830f42408238108203e800a00000000000000000000000000000000000000000000000000000000000000000880000000000000000f862f860800a83061a8094095e7baea6a6c7c4c2dfeb977efac326af552d8780801ba072ed817487b84ba367d15d2f039b5fc5f087d0a8882fbdf73e8cb49357e1ce30a0403d800545b8fc544f92ce8124e2255f8c3c6af93f28243a120585d4c4c6a2a3c0"); diff --git a/crates/storage/provider/src/test_utils/mod.rs b/crates/storage/provider/src/test_utils/mod.rs index 0da47c47940b..7857b8c2144a 100644 --- a/crates/storage/provider/src/test_utils/mod.rs +++ b/crates/storage/provider/src/test_utils/mod.rs @@ -1,6 +1,6 @@ use crate::ProviderFactory; use reth_db::{ - test_utils::{create_test_rw_db, TempDatabase}, + test_utils::{create_test_rw_db, create_test_static_files_dir, TempDatabase}, DatabaseEnv, }; use reth_primitives::{ChainSpec, MAINNET}; @@ -27,5 +27,6 @@ pub fn create_test_provider_factory_with_chain_spec( chain_spec: Arc, ) -> ProviderFactory>> { let db = create_test_rw_db(); - ProviderFactory::new(db, chain_spec) + ProviderFactory::new(db, chain_spec, create_test_static_files_dir()) + .expect("create provider factory with static_files") } diff --git a/crates/storage/provider/src/traits/hashing.rs b/crates/storage/provider/src/traits/hashing.rs index b5a77247ed1d..7978a4b19406 100644 --- a/crates/storage/provider/src/traits/hashing.rs +++ b/crates/storage/provider/src/traits/hashing.rs @@ -20,7 +20,7 @@ pub trait HashingWriter: Send + Sync { range: RangeInclusive, ) -> ProviderResult>>; - /// Inserts all accounts into [reth_db::tables::AccountHistory] table. + /// Inserts all accounts into [reth_db::tables::AccountsHistory] table. /// /// # Returns /// diff --git a/crates/storage/provider/src/traits/mod.rs b/crates/storage/provider/src/traits/mod.rs index 1260534784d7..360fe97c06aa 100644 --- a/crates/storage/provider/src/traits/mod.rs +++ b/crates/storage/provider/src/traits/mod.rs @@ -71,3 +71,6 @@ pub use prune_checkpoint::{PruneCheckpointReader, PruneCheckpointWriter}; mod database_provider; pub use database_provider::DatabaseProviderFactory; + +mod stats; +pub use stats::StatsReader; diff --git a/crates/storage/provider/src/traits/stats.rs b/crates/storage/provider/src/traits/stats.rs new file mode 100644 index 000000000000..dece75e287ba --- /dev/null +++ b/crates/storage/provider/src/traits/stats.rs @@ -0,0 +1,10 @@ +use reth_db::table::Table; +use reth_interfaces::provider::ProviderResult; + +/// The trait for fetching provider statistics. +#[auto_impl::auto_impl(&, Arc)] +pub trait StatsReader: Send + Sync { + /// Fetch the number of entries in the corresponding [Table]. Depending on the provider, it may + /// route to different data sources other than [Table]. + fn count_entries(&self) -> ProviderResult; +} diff --git a/crates/transaction-pool/src/pool/best.rs b/crates/transaction-pool/src/pool/best.rs index 74d3e295fb43..5e870de2b81e 100644 --- a/crates/transaction-pool/src/pool/best.rs +++ b/crates/transaction-pool/src/pool/best.rs @@ -85,7 +85,7 @@ pub(crate) struct BestTransactions { /// There might be the case where a yielded transactions is invalid, this will track it. pub(crate) invalid: HashSet, /// Used to receive any new pending transactions that have been added to the pool after this - /// iterator was snapshotted + /// iterator was static fileted /// /// These new pending transactions are inserted into this iterator's pool before yielding the /// next value diff --git a/crates/transaction-pool/src/pool/pending.rs b/crates/transaction-pool/src/pool/pending.rs index 9476f3c6cb1b..90ae13cd6449 100644 --- a/crates/transaction-pool/src/pool/pending.rs +++ b/crates/transaction-pool/src/pool/pending.rs @@ -51,7 +51,7 @@ pub struct PendingPool { /// See also [`PoolTransaction::size`](crate::traits::PoolTransaction::size). size_of: SizeTracker, /// Used to broadcast new transactions that have been added to the PendingPool to existing - /// snapshots of this pool. + /// static_files of this pool. new_transaction_notifier: broadcast::Sender>, } @@ -309,7 +309,7 @@ impl PendingPool { self.update_independents_and_highest_nonces(&tx, &tx_id); self.all.insert(tx.clone()); - // send the new transaction to any existing pendingpool snapshot iterators + // send the new transaction to any existing pendingpool static file iterators if self.new_transaction_notifier.receiver_count() > 0 { let _ = self.new_transaction_notifier.send(tx.clone()); } diff --git a/crates/transaction-pool/src/test_utils/gen.rs b/crates/transaction-pool/src/test_utils/gen.rs index 020cfd06f96d..614b4b00dfa1 100644 --- a/crates/transaction-pool/src/test_utils/gen.rs +++ b/crates/transaction-pool/src/test_utils/gen.rs @@ -3,7 +3,7 @@ use rand::Rng; use reth_primitives::{ constants::MIN_PROTOCOL_BASE_FEE, sign_message, AccessList, Address, Bytes, FromRecoveredTransaction, Transaction, TransactionKind, TransactionSigned, TxEip1559, - TxEip4844, TxLegacy, TxValue, B256, MAINNET, + TxEip4844, TxLegacy, B256, MAINNET, U256, }; /// A generator for transactions for testing purposes. @@ -129,7 +129,7 @@ pub struct TransactionBuilder { /// The recipient or contract address of the transaction. pub to: TransactionKind, /// The value to be transferred in the transaction. - pub value: TxValue, + pub value: U256, /// The list of addresses and storage keys that the transaction can access. pub access_list: AccessList, /// The input data for the transaction, typically containing function parameters for contract @@ -250,7 +250,7 @@ impl TransactionBuilder { /// Sets the value to be transferred in the transaction. pub fn value(mut self, value: u128) -> Self { - self.value = value.into(); + self.value = U256::from(value); self } @@ -310,7 +310,7 @@ impl TransactionBuilder { /// Sets the value to be transferred in the transaction, mutable reference version. pub fn set_value(&mut self, value: u128) -> &mut Self { - self.value = value.into(); + self.value = U256::from(value); self } diff --git a/crates/transaction-pool/src/test_utils/mock.rs b/crates/transaction-pool/src/test_utils/mock.rs index 904b932f4c09..8fa0673b5308 100644 --- a/crates/transaction-pool/src/test_utils/mock.rs +++ b/crates/transaction-pool/src/test_utils/mock.rs @@ -851,7 +851,7 @@ impl FromRecoveredTransaction for MockTransaction { gas_price, gas_limit, to, - value: value.into(), + value, input, size, }, @@ -873,7 +873,7 @@ impl FromRecoveredTransaction for MockTransaction { max_priority_fee_per_gas, gas_limit, to, - value: value.into(), + value, input, accesslist: access_list, size, @@ -899,7 +899,7 @@ impl FromRecoveredTransaction for MockTransaction { max_fee_per_blob_gas, gas_limit, to, - value: value.into(), + value, input, accesslist: access_list, sidecar: BlobTransactionSidecar::default(), @@ -921,7 +921,7 @@ impl FromRecoveredTransaction for MockTransaction { gas_price, gas_limit, to, - value: value.into(), + value, input, accesslist: access_list, size, @@ -989,7 +989,7 @@ impl From for Transaction { gas_price, gas_limit, to, - value: value.into(), + value, input: input.clone(), }), MockTransaction::Eip1559 { @@ -1011,7 +1011,7 @@ impl From for Transaction { max_fee_per_gas, max_priority_fee_per_gas, to, - value: value.into(), + value, access_list: accesslist.clone(), input: input.clone(), }), @@ -1036,7 +1036,7 @@ impl From for Transaction { max_fee_per_gas, max_priority_fee_per_gas, to, - value: value.into(), + value, access_list: accesslist, blob_versioned_hashes: vec![hash], max_fee_per_blob_gas, @@ -1059,7 +1059,7 @@ impl From for Transaction { gas_price, gas_limit, to, - value: value.into(), + value, access_list: accesslist, input, }), @@ -1101,7 +1101,7 @@ impl proptest::arbitrary::Arbitrary for MockTransaction { gas_price: *gas_price, gas_limit: *gas_limit, to: *to, - value: (*value).into(), + value: (*value), input: (*input).clone(), size: tx.size(), }, @@ -1123,7 +1123,7 @@ impl proptest::arbitrary::Arbitrary for MockTransaction { max_priority_fee_per_gas: *max_priority_fee_per_gas, gas_limit: *gas_limit, to: *to, - value: (*value).into(), + value: (*value), input: (*input).clone(), accesslist: (*access_list).clone(), size: tx.size(), @@ -1148,7 +1148,7 @@ impl proptest::arbitrary::Arbitrary for MockTransaction { max_fee_per_blob_gas: *max_fee_per_blob_gas, gas_limit: *gas_limit, to: *to, - value: (*value).into(), + value: (*value), input: (*input).clone(), accesslist: (*access_list).clone(), // only generate a sidecar if it is a 4844 tx - also for the sake of diff --git a/crates/transaction-pool/src/traits.rs b/crates/transaction-pool/src/traits.rs index f4c26f6fa990..97397c9efbdc 100644 --- a/crates/transaction-pool/src/traits.rs +++ b/crates/transaction-pool/src/traits.rs @@ -928,7 +928,7 @@ impl EthPooledTransaction { #[cfg(feature = "optimism")] Transaction::Deposit(_) => U256::ZERO, }; - let mut cost: U256 = transaction.value().into(); + let mut cost: U256 = transaction.value(); cost += gas_cost; if let Some(blob_tx) = transaction.as_eip4844() { diff --git a/crates/transaction-pool/src/validate/eth.rs b/crates/transaction-pool/src/validate/eth.rs index b8cfa49ca1b9..426b4ca837d3 100644 --- a/crates/transaction-pool/src/validate/eth.rs +++ b/crates/transaction-pool/src/validate/eth.rs @@ -823,7 +823,7 @@ mod tests { from: signer, to: TransactionKind::Create, mint: None, - value: reth_primitives::TxValue::from(U256::ZERO), + value: reth_primitives::U256::ZERO, gas_limit: 0u64, is_system_transaction: false, input: Default::default(), diff --git a/crates/trie/Cargo.toml b/crates/trie/Cargo.toml index 280189eada02..a6bb0bbc3224 100644 --- a/crates/trie/Cargo.toml +++ b/crates/trie/Cargo.toml @@ -27,7 +27,7 @@ tracing.workspace = true # misc thiserror.workspace = true -derive_more = "0.99" +derive_more.workspace = true auto_impl = "1" # test-utils diff --git a/crates/trie/src/hashed_cursor/default.rs b/crates/trie/src/hashed_cursor/default.rs index d49feedd1849..298c5ce2e756 100644 --- a/crates/trie/src/hashed_cursor/default.rs +++ b/crates/trie/src/hashed_cursor/default.rs @@ -7,21 +7,21 @@ use reth_db::{ use reth_primitives::{Account, StorageEntry, B256}; impl<'a, TX: DbTx> HashedCursorFactory for &'a TX { - type AccountCursor = ::Cursor; - type StorageCursor = ::DupCursor; + type AccountCursor = ::Cursor; + type StorageCursor = ::DupCursor; fn hashed_account_cursor(&self) -> Result { - self.cursor_read::() + self.cursor_read::() } fn hashed_storage_cursor(&self) -> Result { - self.cursor_dup_read::() + self.cursor_dup_read::() } } impl HashedAccountCursor for C where - C: DbCursorRO, + C: DbCursorRO, { fn seek(&mut self, key: B256) -> Result, reth_db::DatabaseError> { self.seek(key) @@ -34,7 +34,7 @@ where impl HashedStorageCursor for C where - C: DbCursorRO + DbDupCursorRO, + C: DbCursorRO + DbDupCursorRO, { fn is_storage_empty(&mut self, key: B256) -> Result { Ok(self.seek_exact(key)?.is_none()) diff --git a/crates/trie/src/hashed_cursor/post_state.rs b/crates/trie/src/hashed_cursor/post_state.rs index 8f1ec137eafb..7c9e048cb849 100644 --- a/crates/trie/src/hashed_cursor/post_state.rs +++ b/crates/trie/src/hashed_cursor/post_state.rs @@ -440,7 +440,7 @@ mod tests { let db = create_test_rw_db(); db.update(|tx| { for (key, account) in accounts.iter() { - tx.put::(*key, *account).unwrap(); + tx.put::(*key, *account).unwrap(); } }) .unwrap(); @@ -460,7 +460,7 @@ mod tests { let db = create_test_rw_db(); db.update(|tx| { for (key, account) in accounts.iter().filter(|x| x.0[31] % 2 == 0) { - tx.put::(*key, *account).unwrap(); + tx.put::(*key, *account).unwrap(); } }) .unwrap(); @@ -487,7 +487,7 @@ mod tests { let db = create_test_rw_db(); db.update(|tx| { for (key, account) in accounts.iter().filter(|x| x.0[31] % 2 == 0) { - tx.put::(*key, *account).unwrap(); + tx.put::(*key, *account).unwrap(); } }) .unwrap(); @@ -517,7 +517,7 @@ mod tests { db.update(|tx| { for (key, _) in accounts.iter() { // insert zero value accounts to the database - tx.put::(*key, Account::default()).unwrap(); + tx.put::(*key, Account::default()).unwrap(); } }) .unwrap(); @@ -539,7 +539,7 @@ mod tests { let db = create_test_rw_db(); db.update(|tx| { for (key, account) in db_accounts.iter() { - tx.put::(*key, *account).unwrap(); + tx.put::(*key, *account).unwrap(); } }) .unwrap(); @@ -586,7 +586,7 @@ mod tests { db.update(|tx| { for (slot, value) in db_storage.iter() { // insert zero value accounts to the database - tx.put::( + tx.put::( address, StorageEntry { key: *slot, value: *value }, ) @@ -664,7 +664,7 @@ mod tests { db.update(|tx| { for (slot, value) in db_storage.iter() { // insert zero value accounts to the database - tx.put::( + tx.put::( address, StorageEntry { key: *slot, value: *value }, ) @@ -703,7 +703,7 @@ mod tests { db.update(|tx| { for (slot, value) in db_storage { // insert zero value accounts to the database - tx.put::(address, StorageEntry { key: slot, value }) + tx.put::(address, StorageEntry { key: slot, value }) .unwrap(); } }) @@ -741,7 +741,7 @@ mod tests { db.update(|tx| { for (slot, value) in db_storage { // insert zero value accounts to the database - tx.put::(address, StorageEntry { key: slot, value }) + tx.put::(address, StorageEntry { key: slot, value }) .unwrap(); } }) @@ -773,7 +773,7 @@ mod tests { db.update(|tx| { for (slot, _) in storage.iter() { // insert zero value accounts to the database - tx.put::( + tx.put::( address, StorageEntry { key: *slot, value: U256::ZERO }, ) @@ -811,7 +811,7 @@ mod tests { for (address, storage) in db_storages.iter() { for (slot, value) in storage { let entry = StorageEntry { key: *slot, value: *value }; - tx.put::(*address, entry).unwrap(); + tx.put::(*address, entry).unwrap(); } } }) diff --git a/crates/trie/src/prefix_set/loader.rs b/crates/trie/src/prefix_set/loader.rs index 392c7bef840d..10fbbadd469b 100644 --- a/crates/trie/src/prefix_set/loader.rs +++ b/crates/trie/src/prefix_set/loader.rs @@ -33,7 +33,7 @@ impl<'a, TX: DbTx> PrefixSetLoader<'a, TX> { let mut destroyed_accounts = HashSet::default(); // Walk account changeset and insert account prefixes. - let mut account_changeset_cursor = self.cursor_read::()?; + let mut account_changeset_cursor = self.cursor_read::()?; let mut account_plain_state_cursor = self.cursor_read::()?; for account_entry in account_changeset_cursor.walk_range(range.clone())? { let (_, AccountBeforeTx { address, .. }) = account_entry?; @@ -47,7 +47,7 @@ impl<'a, TX: DbTx> PrefixSetLoader<'a, TX> { // Walk storage changeset and insert storage prefixes as well as account prefixes if missing // from the account prefix set. - let mut storage_cursor = self.cursor_dup_read::()?; + let mut storage_cursor = self.cursor_dup_read::()?; let storage_range = BlockNumberAddress::range(range); for storage_entry in storage_cursor.walk_range(storage_range)? { let (BlockNumberAddress((_, address)), StorageEntry { key, .. }) = storage_entry?; diff --git a/crates/trie/src/state.rs b/crates/trie/src/state.rs index 1aff3b2543f2..8d9aead3d5da 100644 --- a/crates/trie/src/state.rs +++ b/crates/trie/src/state.rs @@ -65,7 +65,7 @@ impl HashedPostState { ) -> Result { // Iterate over account changesets and record value before first occurring account change. let mut accounts = HashMap::>::default(); - let mut account_changesets_cursor = tx.cursor_read::()?; + let mut account_changesets_cursor = tx.cursor_read::()?; for entry in account_changesets_cursor.walk_range(range.clone())? { let (_, AccountBeforeTx { address, info }) = entry?; if let hash_map::Entry::Vacant(entry) = accounts.entry(address) { @@ -75,7 +75,7 @@ impl HashedPostState { // Iterate over storage changesets and record value before first occurring storage change. let mut storages = HashMap::>::default(); - let mut storage_changesets_cursor = tx.cursor_read::()?; + let mut storage_changesets_cursor = tx.cursor_read::()?; for entry in storage_changesets_cursor.walk_range(BlockNumberAddress::range(range))? { let (BlockNumberAddress((_, address)), storage) = entry?; let account_storage = storages.entry(address).or_default(); diff --git a/crates/trie/src/trie.rs b/crates/trie/src/trie.rs index 0a34ccda3cdd..9d51077ba117 100644 --- a/crates/trie/src/trie.rs +++ b/crates/trie/src/trie.rs @@ -498,13 +498,13 @@ mod tests { storage: &BTreeMap, ) { let hashed_address = keccak256(address); - tx.put::(hashed_address, account).unwrap(); + tx.put::(hashed_address, account).unwrap(); insert_storage(tx, hashed_address, storage); } fn insert_storage(tx: &impl DbTxMut, hashed_address: B256, storage: &BTreeMap) { for (k, v) in storage { - tx.put::( + tx.put::( hashed_address, StorageEntry { key: keccak256(k), value: *v }, ) @@ -518,7 +518,7 @@ mod tests { let hashed_address = B256::with_last_byte(1); let mut hashed_storage_cursor = - tx.tx_ref().cursor_dup_write::().unwrap(); + tx.tx_ref().cursor_dup_write::().unwrap(); let data = inputs.iter().map(|x| B256::from_str(x).unwrap()); let value = U256::from(0); for key in data { @@ -581,7 +581,7 @@ mod tests { let factory = create_test_provider_factory(); let tx = factory.provider_rw().unwrap(); for (key, value) in &storage { - tx.tx_ref().put::( + tx.tx_ref().put::( hashed_address, StorageEntry { key: keccak256(key), value: *value }, ) @@ -777,7 +777,7 @@ mod tests { ); let mut hashed_storage_cursor = - tx.tx_ref().cursor_dup_write::().unwrap(); + tx.tx_ref().cursor_dup_write::().unwrap(); for (hashed_slot, value) in storage.clone() { hashed_storage_cursor.upsert(key3, StorageEntry { key: hashed_slot, value }).unwrap(); } @@ -806,9 +806,9 @@ mod tests { let tx = factory.provider_rw().unwrap(); let mut hashed_account_cursor = - tx.tx_ref().cursor_write::().unwrap(); + tx.tx_ref().cursor_write::().unwrap(); let mut hashed_storage_cursor = - tx.tx_ref().cursor_dup_write::().unwrap(); + tx.tx_ref().cursor_dup_write::().unwrap(); let mut hash_builder = HashBuilder::default(); @@ -1002,7 +1002,7 @@ mod tests { { let mut hashed_account_cursor = - tx.tx_ref().cursor_write::().unwrap(); + tx.tx_ref().cursor_write::().unwrap(); let account = hashed_account_cursor.seek_exact(key2).unwrap().unwrap(); hashed_account_cursor.delete_current().unwrap(); @@ -1059,7 +1059,7 @@ mod tests { let tx = factory.provider_rw().unwrap(); { let mut hashed_account_cursor = - tx.tx_ref().cursor_write::().unwrap(); + tx.tx_ref().cursor_write::().unwrap(); let account2 = hashed_account_cursor.seek_exact(key2).unwrap().unwrap(); hashed_account_cursor.delete_current().unwrap(); @@ -1172,7 +1172,7 @@ mod tests { tokio::runtime::Runtime::new().unwrap().block_on(async { let factory = create_test_provider_factory(); let tx = factory.provider_rw().unwrap(); - let mut hashed_account_cursor = tx.tx_ref().cursor_write::().unwrap(); + let mut hashed_account_cursor = tx.tx_ref().cursor_write::().unwrap(); let mut state = BTreeMap::default(); for accounts in account_changes { @@ -1234,7 +1234,7 @@ mod tests { ) -> (B256, HashMap) { let value = U256::from(1); - let mut hashed_storage = tx.tx_ref().cursor_write::().unwrap(); + let mut hashed_storage = tx.tx_ref().cursor_write::().unwrap(); let mut hb = HashBuilder::default().with_updates(true); @@ -1262,7 +1262,7 @@ mod tests { Account { nonce: 0, balance: U256::from(1u64), bytecode_hash: Some(B256::random()) }; let val = encode_account(a, None); - let mut hashed_accounts = tx.tx_ref().cursor_write::().unwrap(); + let mut hashed_accounts = tx.tx_ref().cursor_write::().unwrap(); let mut hb = HashBuilder::default(); for key in [ diff --git a/docs/crates/db.md b/docs/crates/db.md index cf0161d2b5c3..b08383b7a6d9 100644 --- a/docs/crates/db.md +++ b/docs/crates/db.md @@ -35,30 +35,30 @@ The `Table` trait has two generic values, `Key` and `Value`, which need to imple There are many tables within the node, all used to store different types of data from `Headers` to `Transactions` and more. Below is a list of all of the tables. You can follow [this link](https://github.com/paradigmxyz/reth/blob/1563506aea09049a85e5cc72c2894f3f7a371581/crates/storage/db/src/tables/mod.rs#L161-L188) if you would like to see the table definitions for any of the tables below. - CanonicalHeaders -- HeaderTD +- HeaderTerminalDifficulties - HeaderNumbers - Headers - BlockBodyIndices - BlockOmmers - BlockWithdrawals -- TransactionBlock +- TransactionBlocks - Transactions -- TxHashNumber +- TransactionHashNumbers - Receipts - PlainAccountState - PlainStorageState - Bytecodes -- AccountHistory -- StorageHistory -- AccountChangeSet -- StorageChangeSet +- AccountsHistory +- StoragesHistory +- AccountChangeSets +- StorageChangeSets - HashedAccount -- HashedStorage +- HashedStorages - AccountsTrie - StoragesTrie -- TxSenders -- SyncStage -- SyncStageProgress +- TransactionSenders +- StageCheckpoints +- StageCheckpointProgresses - PruneCheckpoints
@@ -137,7 +137,6 @@ The `Database` defines two associated types `TX` and `TXMut`. [File: crates/storage/db/src/abstraction/database.rs](https://github.com/paradigmxyz/reth/blob/main/crates/storage/db/src/abstraction/database.rs#L11) - The `TX` type can be any type that implements the `DbTx` trait, which provides a set of functions to interact with read only transactions. [File: crates/storage/db/src/abstraction/transaction.rs](https://github.com/paradigmxyz/reth/blob/main/crates/storage/db/src/abstraction/transaction.rs#L36) @@ -149,7 +148,7 @@ pub trait DbTx: Send + Sync { type Cursor: DbCursorRO + Send + Sync; /// DupCursor type for this read-only transaction type DupCursor: DbDupCursorRO + DbCursorRO + Send + Sync; - + /// Get value fn get(&self, key: T::Key) -> Result, Error>; /// Commit for read only transaction will consume and free transaction and allows diff --git a/docs/crates/stages.md b/docs/crates/stages.md index 1ea64aaab146..8e3de4a044a9 100644 --- a/docs/crates/stages.md +++ b/docs/crates/stages.md @@ -94,10 +94,6 @@ This process continues until all of the headers have been downloaded and written
-## TotalDifficultyStage -* TODO: explain stage -
- ## BodyStage Once the `HeaderStage` completes successfully, the `BodyStage` will start execution. The body stage downloads block bodies for all of the new block headers that were stored locally in the database. The `BodyStage` first determines which block bodies to download by checking if the block body has an ommers hash and transaction root. diff --git a/docs/design/database.md b/docs/design/database.md index 42ec8ba56036..db5da983f51e 100644 --- a/docs/design/database.md +++ b/docs/design/database.md @@ -2,24 +2,24 @@ ## Abstractions -* We created a [Database trait abstraction](https://github.com/paradigmxyz/reth/blob/0d9b9a392d4196793736522f3fc2ac804991b45d/crates/interfaces/src/db/mod.rs) using Rust Stable GATs which frees us from being bound to a single database implementation. We currently use MDBX, but are exploring [redb](https://github.com/cberner/redb) as an alternative. -* We then iterated on [`Transaction`](https://github.com/paradigmxyz/reth/blob/0d9b9a392d4196793736522f3fc2ac804991b45d/crates/stages/src/db.rs#L14-L19) as a non-leaky abstraction with helpers for strictly-typed and unit-tested higher-level database abstractions. +- We created a [Database trait abstraction](https://github.com/paradigmxyz/reth/blob/0d9b9a392d4196793736522f3fc2ac804991b45d/crates/interfaces/src/db/mod.rs) using Rust Stable GATs which frees us from being bound to a single database implementation. We currently use MDBX, but are exploring [redb](https://github.com/cberner/redb) as an alternative. +- We then iterated on [`Transaction`](https://github.com/paradigmxyz/reth/blob/0d9b9a392d4196793736522f3fc2ac804991b45d/crates/stages/src/db.rs#L14-L19) as a non-leaky abstraction with helpers for strictly-typed and unit-tested higher-level database abstractions. ## Codecs -* We want Reth's serialized format to be able to trade off read/write speed for size, depending on who the user is. -* To achieve that, we created the [Encode/Decode/Compress/Decompress traits](https://github.com/paradigmxyz/reth/blob/0d9b9a392d4196793736522f3fc2ac804991b45d/crates/interfaces/src/db/table.rs#L9-L36) to make the (de)serialization of database `Table::Key` and `Table::Values` generic. - * This allows for [out-of-the-box benchmarking](https://github.com/paradigmxyz/reth/blob/0d9b9a392d4196793736522f3fc2ac804991b45d/crates/db/benches/encoding_iai.rs#L5) (using [Criterion](https://github.com/bheisler/criterion.rs) and [Iai](https://github.com/bheisler/iai)) - * It also enables [out-of-the-box fuzzing](https://github.com/paradigmxyz/reth/blob/0d9b9a392d4196793736522f3fc2ac804991b45d/crates/interfaces/src/db/codecs/fuzz/mod.rs) using [trailofbits/test-fuzz](https://github.com/trailofbits/test-fuzz). -* We implemented that trait for the following encoding formats: - * [Ethereum-specific Compact Encoding](https://github.com/paradigmxyz/reth/blob/0d9b9a392d4196793736522f3fc2ac804991b45d/crates/codecs/derive/src/compact/mod.rs): A lot of Ethereum datatypes have unnecessary zeros when serialized, or optional (e.g. on empty hashes) which would be nice not to pay in storage costs. - * [Erigon](https://github.com/ledgerwatch/erigon/blob/12ee33a492f5d240458822d052820d9998653a63/docs/programmers_guide/db_walkthrough.MD) achieves that by having a `bitfield` set on Table "PlainState which adds a bitfield to Accounts. - * Akula expanded it for other tables and datatypes manually. It also saved some more space by storing the length of certain types (U256, u64) using the [`modular_bitfield`](https://docs.rs/modular-bitfield/latest/modular_bitfield/) crate, which compacts this information. - * We generalized it for all types, by writing a derive macro that autogenerates code for implementing the trait. It, also generates the interfaces required for fuzzing using ToB/test-fuzz: - * [Scale Encoding](https://github.com/paritytech/parity-scale-codec) - * [Postcard Encoding](https://github.com/jamesmunns/postcard) - * Passthrough (called `no_codec` in the codebase) -* We made implementation of these traits easy via a derive macro called [`main_codec`](https://github.com/paradigmxyz/reth/blob/0d9b9a392d4196793736522f3fc2ac804991b45d/crates/codecs/derive/src/lib.rs#L15) that delegates to one of Compact (default), Scale, Postcard or Passthrough encoding. This is [derived on every struct we need](https://github.com/search?q=repo%3Aparadigmxyz%2Freth%20%22%23%5Bmain_codec%5D%22&type=code), and lets us experiment with different encoding formats without having to modify the entire codebase each time. +- We want Reth's serialized format to be able to trade off read/write speed for size, depending on who the user is. +- To achieve that, we created the [Encode/Decode/Compress/Decompress traits](https://github.com/paradigmxyz/reth/blob/0d9b9a392d4196793736522f3fc2ac804991b45d/crates/interfaces/src/db/table.rs#L9-L36) to make the (de)serialization of database `Table::Key` and `Table::Values` generic. + - This allows for [out-of-the-box benchmarking](https://github.com/paradigmxyz/reth/blob/0d9b9a392d4196793736522f3fc2ac804991b45d/crates/db/benches/encoding_iai.rs#L5) (using [Criterion](https://github.com/bheisler/criterion.rs) and [Iai](https://github.com/bheisler/iai)) + - It also enables [out-of-the-box fuzzing](https://github.com/paradigmxyz/reth/blob/0d9b9a392d4196793736522f3fc2ac804991b45d/crates/interfaces/src/db/codecs/fuzz/mod.rs) using [trailofbits/test-fuzz](https://github.com/trailofbits/test-fuzz). +- We implemented that trait for the following encoding formats: + - [Ethereum-specific Compact Encoding](https://github.com/paradigmxyz/reth/blob/0d9b9a392d4196793736522f3fc2ac804991b45d/crates/codecs/derive/src/compact/mod.rs): A lot of Ethereum datatypes have unnecessary zeros when serialized, or optional (e.g. on empty hashes) which would be nice not to pay in storage costs. + - [Erigon](https://github.com/ledgerwatch/erigon/blob/12ee33a492f5d240458822d052820d9998653a63/docs/programmers_guide/db_walkthrough.MD) achieves that by having a `bitfield` set on Table "PlainState which adds a bitfield to Accounts. + - Akula expanded it for other tables and datatypes manually. It also saved some more space by storing the length of certain types (U256, u64) using the [`modular_bitfield`](https://docs.rs/modular-bitfield/latest/modular_bitfield/) crate, which compacts this information. + - We generalized it for all types, by writing a derive macro that autogenerates code for implementing the trait. It, also generates the interfaces required for fuzzing using ToB/test-fuzz: + - [Scale Encoding](https://github.com/paritytech/parity-scale-codec) + - [Postcard Encoding](https://github.com/jamesmunns/postcard) + - Passthrough (called `no_codec` in the codebase) +- We made implementation of these traits easy via a derive macro called [`main_codec`](https://github.com/paradigmxyz/reth/blob/0d9b9a392d4196793736522f3fc2ac804991b45d/crates/codecs/derive/src/lib.rs#L15) that delegates to one of Compact (default), Scale, Postcard or Passthrough encoding. This is [derived on every struct we need](https://github.com/search?q=repo%3Aparadigmxyz%2Freth%20%22%23%5Bmain_codec%5D%22&type=code), and lets us experiment with different encoding formats without having to modify the entire codebase each time. ### Table layout @@ -58,11 +58,11 @@ Transactions { u64 TxNumber "PK" TransactionSignedNoHash Data } -TxHashNumber { +TransactionHashNumbers { B256 TxHash "PK" u64 TxNumber } -TransactionBlock { +TransactionBlocks { u64 MaxTxNumber "PK" u64 BlockNumber } @@ -83,31 +83,31 @@ PlainStorageState { B256 StorageKey "PK" U256 StorageValue } -AccountHistory { +AccountsHistory { B256 Account "PK" BlockNumberList BlockNumberList "List of transitions where account was changed" } -StorageHistory { +StoragesHistory { B256 Account "PK" B256 StorageKey "PK" BlockNumberList BlockNumberList "List of transitions where account storage entry was changed" } -AccountChangeSet { +AccountChangeSets { u64 BlockNumber "PK" B256 Account "PK" - ChangeSet AccountChangeSet "Account before transition" + ChangeSet AccountChangeSets "Account before transition" } -StorageChangeSet { +StorageChangeSets { u64 BlockNumber "PK" B256 Account "PK" B256 StorageKey "PK" - ChangeSet StorageChangeSet "Storage entry before transition" + ChangeSet StorageChangeSets "Storage entry before transition" } -HashedAccount { +HashedAccounts { B256 HashedAddress "PK" Account Data } -HashedStorage { +HashedStorages { B256 HashedAddress "PK" B256 HashedStorageKey "PK" U256 StorageValue @@ -121,17 +121,17 @@ StoragesTrie { StoredNibblesSubKey NibblesSubKey "PK" StorageTrieEntry Node } -TxSenders { +TransactionSenders { u64 TxNumber "PK" Address Sender } -TxHashNumber ||--|| Transactions : "hash -> tx id" -TransactionBlock ||--|{ Transactions : "tx id -> block number" +TransactionHashNumbers ||--|| Transactions : "hash -> tx id" +TransactionBlocks ||--|{ Transactions : "tx id -> block number" BlockBodyIndices ||--o{ Transactions : "block number -> tx ids" -Headers ||--o{ AccountChangeSet : "each block has zero or more changesets" -Headers ||--o{ StorageChangeSet : "each block has zero or more changesets" -AccountHistory }|--|{ AccountChangeSet : index -StorageHistory }|--|{ StorageChangeSet : index +Headers ||--o{ AccountChangeSets : "each block has zero or more changesets" +Headers ||--o{ StorageChangeSets : "each block has zero or more changesets" +AccountsHistory }|--|{ AccountChangeSets : index +StoragesHistory }|--|{ StorageChangeSets : index Headers ||--o| BlockOmmers : "each block has 0 or more ommers" BlockBodyIndices ||--|| Headers : "index" HeaderNumbers |o--|| Headers : "block hash -> block number" @@ -139,8 +139,8 @@ CanonicalHeaders |o--|| Headers : "canonical chain block number -> block hash" Transactions ||--|| Receipts : "each tx has a receipt" PlainAccountState }o--o| Bytecodes : "an account can have a bytecode" PlainAccountState ||--o{ PlainStorageState : "an account has 0 or more storage slots" -Transactions ||--|| TxSenders : "a tx has exactly 1 sender" +Transactions ||--|| TransactionSenders : "a tx has exactly 1 sender" -PlainAccountState ||--|| HashedAccount : "hashed representation" -PlainStorageState ||--|| HashedStorage : "hashed representation" +PlainAccountState ||--|| HashedAccounts : "hashed representation" +PlainStorageState ||--|| HashedStorages : "hashed representation" ``` diff --git a/etc/grafana/dashboards/overview.json b/etc/grafana/dashboards/overview.json index b496b694d7f5..85ebb52c4be1 100644 --- a/etc/grafana/dashboards/overview.json +++ b/etc/grafana/dashboards/overview.json @@ -687,7 +687,6 @@ "MerkleUnwind": 5, "SenderRecovery": 3, "StorageHashing": 7, - "TotalDifficulty": 1, "TransactionLookup": 9 } } diff --git a/examples/Cargo.toml b/examples/Cargo.toml index 92bb0f1f1ea8..4b50d33573ca 100644 --- a/examples/Cargo.toml +++ b/examples/Cargo.toml @@ -27,6 +27,8 @@ futures.workspace = true async-trait.workspace = true tokio.workspace = true +jemallocator = { version = "0.5.0", features = ["profiling"] } + [[example]] name = "db-access" path = "db-access.rs" @@ -38,3 +40,4 @@ path = "network.rs" [[example]] name = "network-txpool" path = "network-txpool.rs" + diff --git a/examples/db-access.rs b/examples/db-access.rs index 228834838462..6edfc6afe166 100644 --- a/examples/db-access.rs +++ b/examples/db-access.rs @@ -18,12 +18,14 @@ fn main() -> eyre::Result<()> { // Opens a RO handle to the database file. // TODO: Should be able to do `ProviderFactory::new_with_db_path_ro(...)` instead of // doing in 2 steps. - let db = open_db_read_only(Path::new(&std::env::var("RETH_DB_PATH")?), Default::default())?; + let db_path = std::env::var("RETH_DB_PATH")?; + let db_path = Path::new(&db_path); + let db = open_db_read_only(db_path.join("db").as_path(), Default::default())?; // Instantiate a provider factory for Ethereum mainnet using the provided DB. // TODO: Should the DB version include the spec so that you do not need to specify it here? let spec = ChainSpecBuilder::mainnet().build(); - let factory = ProviderFactory::new(db, spec.into()); + let factory = ProviderFactory::new(db, spec.into(), db_path.join("static_files"))?; // This call opens a RO transaction on the database. To write to the DB you'd need to call // the `provider_rw` function and look for the `Writer` variants of the traits. diff --git a/examples/polygon-p2p/src/chain_cfg.rs b/examples/polygon-p2p/src/chain_cfg.rs index 034e5b482dba..5a1fadb53423 100644 --- a/examples/polygon-p2p/src/chain_cfg.rs +++ b/examples/polygon-p2p/src/chain_cfg.rs @@ -27,7 +27,6 @@ pub(crate) fn polygon_chain_spec() -> Arc { ]), deposit_contract: None, base_fee_params: reth_primitives::BaseFeeParamsKind::Constant(BaseFeeParams::ethereum()), - snapshot_block_interval: 500_000, prune_delete_limit: 0, } .into() diff --git a/examples/rpc-db/src/main.rs b/examples/rpc-db/src/main.rs index 79b801e02bb0..13fbc6223728 100644 --- a/examples/rpc-db/src/main.rs +++ b/examples/rpc-db/src/main.rs @@ -36,12 +36,11 @@ pub mod myrpc_ext; #[tokio::main] async fn main() -> eyre::Result<()> { // 1. Setup the DB - let db = Arc::new(open_db_read_only( - Path::new(&std::env::var("RETH_DB_PATH")?), - Default::default(), - )?); + let db_path = std::env::var("RETH_DB_PATH")?; + let db_path = Path::new(&db_path); + let db = Arc::new(open_db_read_only(db_path.join("db").as_path(), Default::default())?); let spec = Arc::new(ChainSpecBuilder::mainnet().build()); - let factory = ProviderFactory::new(db.clone(), spec.clone()); + let factory = ProviderFactory::new(db.clone(), spec.clone(), db_path.join("static_files"))?; // 2. Setup the blockchain provider using only the database provider and a noop for the tree to // satisfy trait bounds. Tree is not used in this example since we are only operating on the diff --git a/testing/ef-tests/Cargo.toml b/testing/ef-tests/Cargo.toml index 35480d7e8bb8..c418863777f9 100644 --- a/testing/ef-tests/Cargo.toml +++ b/testing/ef-tests/Cargo.toml @@ -17,7 +17,7 @@ ef-tests = [] [dependencies] reth-primitives.workspace = true reth-db = { workspace = true, features = ["mdbx", "test-utils"] } -reth-provider.workspace = true +reth-provider = { workspace = true, features = ["test-utils"] } reth-stages.workspace = true reth-interfaces.workspace = true reth-revm.workspace = true @@ -29,3 +29,4 @@ walkdir = "2.3.3" serde = "1.0.163" serde_json.workspace = true thiserror.workspace = true +rayon.workspace = true diff --git a/testing/ef-tests/src/cases/blockchain_test.rs b/testing/ef-tests/src/cases/blockchain_test.rs index f2a894cc3fba..3af21076cd7d 100644 --- a/testing/ef-tests/src/cases/blockchain_test.rs +++ b/testing/ef-tests/src/cases/blockchain_test.rs @@ -5,10 +5,11 @@ use crate::{ Case, Error, Suite, }; use alloy_rlp::Decodable; -use reth_db::test_utils::create_test_rw_db; +use rayon::iter::{ParallelBridge, ParallelIterator}; +use reth_db::test_utils::{create_test_rw_db, create_test_static_files_dir}; use reth_node_ethereum::EthEvmConfig; -use reth_primitives::{BlockBody, SealedBlock}; -use reth_provider::{BlockWriter, HashingWriter, ProviderFactory}; +use reth_primitives::{BlockBody, SealedBlock, StaticFileSegment}; +use reth_provider::{providers::StaticFileWriter, HashingWriter, ProviderFactory}; use reth_stages::{stages::ExecutionStage, ExecInput, Stage}; use std::{collections::BTreeMap, fs, path::Path, sync::Arc}; @@ -64,83 +65,107 @@ impl Case for BlockchainTestCase { } // Iterate through test cases, filtering by the network type to exclude specific forks. - for case in self.tests.values().filter(|case| { - !matches!( - case.network, - ForkSpec::ByzantiumToConstantinopleAt5 | - ForkSpec::Constantinople | - ForkSpec::ConstantinopleFix | - ForkSpec::MergeEOF | - ForkSpec::MergeMeterInitCode | - ForkSpec::MergePush0 | - ForkSpec::Unknown - ) - }) { - // Create a new test database and initialize a provider for the test case. - let db = create_test_rw_db(); - let provider = ProviderFactory::new(db.as_ref(), Arc::new(case.network.clone().into())) + self.tests + .values() + .filter(|case| { + !matches!( + case.network, + ForkSpec::ByzantiumToConstantinopleAt5 | + ForkSpec::Constantinople | + ForkSpec::ConstantinopleFix | + ForkSpec::MergeEOF | + ForkSpec::MergeMeterInitCode | + ForkSpec::MergePush0 | + ForkSpec::Unknown + ) + }) + .par_bridge() + .try_for_each(|case| { + // Create a new test database and initialize a provider for the test case. + let db = create_test_rw_db(); + let static_files_dir = create_test_static_files_dir(); + let provider = ProviderFactory::new( + db.as_ref(), + Arc::new(case.network.clone().into()), + static_files_dir.clone(), + )? .provider_rw() .unwrap(); - // Insert initial test state into the provider. - provider - .insert_block( - SealedBlock::new( - case.genesis_block_header.clone().into(), - BlockBody::default(), - ) - .try_seal_with_senders() - .unwrap(), - None, - ) - .map_err(|err| Error::RethError(err.into()))?; - case.pre.write_to_db(provider.tx_ref())?; - - // Decode and insert blocks, creating a chain of blocks for the test case. - let last_block = case.blocks.iter().try_fold(None, |_, block| { - let decoded = SealedBlock::decode(&mut block.rlp.as_ref())?; + // Insert initial test state into the provider. provider - .insert_block(decoded.clone().try_seal_with_senders().unwrap(), None) + .insert_historical_block( + SealedBlock::new( + case.genesis_block_header.clone().into(), + BlockBody::default(), + ) + .try_seal_with_senders() + .unwrap(), + None, + ) .map_err(|err| Error::RethError(err.into()))?; - Ok::, Error>(Some(decoded)) - })?; + case.pre.write_to_db(provider.tx_ref())?; - // Execute the execution stage using the EVM processor factory for the test case - // network. - let _ = ExecutionStage::new_with_factory(reth_revm::EvmProcessorFactory::new( - Arc::new(case.network.clone().into()), - EthEvmConfig::default(), - )) - .execute( - &provider, - ExecInput { target: last_block.as_ref().map(|b| b.number), checkpoint: None }, - ); - - // Validate the post-state for the test case. - match (&case.post_state, &case.post_state_hash) { - (Some(state), None) => { - // Validate accounts in the state against the provider's database. - for (&address, account) in state.iter() { - account.assert_db(address, provider.tx_ref())?; - } - } - (None, Some(expected_state_root)) => { - // Insert state hashes into the provider based on the expected state root. - let last_block = last_block.unwrap_or_default(); + // Decode and insert blocks, creating a chain of blocks for the test case. + let last_block = case.blocks.iter().try_fold(None, |_, block| { + let decoded = SealedBlock::decode(&mut block.rlp.as_ref())?; provider - .insert_hashes( - 0..=last_block.number, - last_block.hash(), - *expected_state_root, + .insert_historical_block( + decoded.clone().try_seal_with_senders().unwrap(), + None, ) .map_err(|err| Error::RethError(err.into()))?; + Ok::, Error>(Some(decoded)) + })?; + provider + .static_file_provider() + .latest_writer(StaticFileSegment::Headers) + .unwrap() + .commit() + .unwrap(); + + // Execute the execution stage using the EVM processor factory for the test case + // network. + let _ = ExecutionStage::new_with_factory(reth_revm::EvmProcessorFactory::new( + Arc::new(case.network.clone().into()), + EthEvmConfig::default(), + )) + .execute( + &provider, + ExecInput { target: last_block.as_ref().map(|b| b.number), checkpoint: None }, + ); + + // Validate the post-state for the test case. + match (&case.post_state, &case.post_state_hash) { + (Some(state), None) => { + // Validate accounts in the state against the provider's database. + for (&address, account) in state.iter() { + account.assert_db(address, provider.tx_ref())?; + } + } + (None, Some(expected_state_root)) => { + // Insert state hashes into the provider based on the expected state root. + let last_block = last_block.unwrap_or_default(); + provider + .insert_hashes( + 0..=last_block.number, + last_block.hash(), + *expected_state_root, + ) + .map_err(|err| Error::RethError(err.into()))?; + } + _ => return Err(Error::MissingPostState), } - _ => return Err(Error::MissingPostState), - } - // Drop the provider without committing to the database. - drop(provider); - } + // Drop the provider without committing to the database. + drop(provider); + // TODO: replace with `tempdir` usage, so the temp directory is removed + // automatically when the variable goes out of scope + reth_primitives::fs::remove_dir_all(static_files_dir) + .expect("Failed to remove static files directory"); + + Ok(()) + })?; Ok(()) } diff --git a/testing/ef-tests/src/models.rs b/testing/ef-tests/src/models.rs index 992cd40a1b1e..2dddc540c456 100644 --- a/testing/ef-tests/src/models.rs +++ b/testing/ef-tests/src/models.rs @@ -161,7 +161,7 @@ impl State { bytecode_hash: code_hash, }; tx.put::(address, reth_account)?; - tx.put::(hashed_address, reth_account)?; + tx.put::(hashed_address, reth_account)?; if let Some(code_hash) = code_hash { tx.put::(code_hash, Bytecode::new_raw(account.code.clone()))?; } @@ -171,7 +171,7 @@ impl State { address, StorageEntry { key: storage_key, value: *v }, )?; - tx.put::( + tx.put::( hashed_address, StorageEntry { key: keccak256(storage_key), value: *v }, )