From 79ed8cad352a75f274a4c49fcac47af44c74a52a Mon Sep 17 00:00:00 2001 From: Mark Holt <135143369+mh0lt@users.noreply.github.com> Date: Wed, 27 Dec 2023 22:05:09 +0000 Subject: [PATCH] E2 snapshot uploading (#9056) This change introduces additional processes to manage snapshot uploading for E2 snapshots: ## erigon snapshots upload The `snapshots uploader` command starts a version of erigon customized for uploading snapshot files to a remote location. It breaks the stage execution process after the senders stage and then uses the snapshot stage to send uploaded headers, bodies and (in the case of polygon) bor spans and events to snapshot files. Because this process avoids execution in run signifigantly faster than a standard erigon configuration. The uploader uses rclone to send seedable (100K or 500K blocks) to a remote storage location specified in the rclone config file. The **uploader** is configured to minimize disk usage by doing the following: * It removes snapshots once they are loaded * It aggressively prunes the database once entities are transferred to snapshots in addition to this it has the following performance related features: * maximizes the workers allocated to snapshot processing to improve throughput * Can be started from scratch by downloading the latest snapshots from the remote location to seed processing ## snapshots command Is a stand alone command for managing remote snapshots it has the following sub commands * **cmp** - compare snapshots * **copy** - copy snapshots * **verify** - verify snapshots * **manifest** - manage the manifest file in the root of remote snapshot locations * **torrent** - manage snapshot torrent files --- .gitignore | 2 + Makefile | 2 + cl/antiquary/antiquary.go | 6 +- cmd/capcli/cli.go | 30 +- cmd/caplin/caplin1/run.go | 4 +- cmd/caplin/main.go | 5 +- cmd/devnet/main.go | 2 +- cmd/downloader/main.go | 11 +- cmd/hack/hack.go | 58 +- cmd/integration/commands/flags.go | 5 + cmd/integration/commands/reset_state.go | 9 +- cmd/integration/commands/root.go | 4 +- cmd/integration/commands/stages.go | 104 +- cmd/integration/commands/state_domains.go | 5 +- cmd/integration/commands/state_stages.go | 30 +- cmd/p2psim/main.go | 6 +- cmd/rpcdaemon/cli/config.go | 15 +- cmd/silkworm_api/snapshot_idx.go | 11 +- cmd/snapshots/README.md | 79 ++ cmd/snapshots/cmp/cmp.go | 788 ++++++++++++++ cmd/snapshots/copy/copy.go | 333 ++++++ cmd/snapshots/flags/flags.go | 11 + cmd/snapshots/main.go | 112 ++ cmd/snapshots/manifest/manifest.go | 365 +++++++ cmd/snapshots/sync/context.go | 38 + cmd/snapshots/sync/sync.go | 444 ++++++++ cmd/snapshots/sync/util.go | 32 + cmd/snapshots/torrents/torrents.go | 504 +++++++++ cmd/snapshots/verify/verify.go | 249 +++++ cmd/state/commands/check_change_sets.go | 7 +- cmd/state/commands/global_flags_vars.go | 5 + cmd/state/commands/opcode_tracer.go | 7 +- cmd/state/commands/state_root.go | 11 +- cmd/state/commands/verify_txlookup.go | 3 +- cmd/state/verify/verify_txlookup.go | 8 +- cmd/tooling/cli.go | 14 +- cmd/utils/flags.go | 2 +- erigon-lib/chain/snapcfg/util.go | 109 +- erigon-lib/common/dbg/experiments.go | 18 +- erigon-lib/common/dir/rw_dir.go | 11 + erigon-lib/compress/decompress.go | 4 + erigon-lib/downloader/downloader.go | 12 +- .../downloader/downloadercfg/downloadercfg.go | 9 +- erigon-lib/downloader/rclone.go | 783 ++++++++++++++ erigon-lib/downloader/rclone_test.go | 99 ++ erigon-lib/downloader/snaptype/files.go | 67 +- erigon-lib/downloader/util.go | 1 + erigon-lib/recsplit/index.go | 1 + erigon-lib/recsplit/recsplit.go | 6 + eth/backend.go | 46 +- eth/ethconfig/config.go | 9 + eth/stagedsync/default_stages.go | 213 +++- eth/stagedsync/exec3.go | 7 +- eth/stagedsync/stage_bodies.go | 22 +- eth/stagedsync/stage_bor_heimdall.go | 46 +- eth/stagedsync/stage_headers.go | 61 +- eth/stagedsync/stage_interhashes_test.go | 11 +- eth/stagedsync/stage_senders.go | 11 +- eth/stagedsync/stage_senders_test.go | 2 +- eth/stagedsync/stage_snapshots.go | 996 +++++++++++++++++- eth/stagedsync/sync.go | 52 +- eth/stagedsync/sync_test.go | 49 +- eth/stagedsync/test/harness.go | 5 +- p2p/sentry/simulator/sentry_simulator.go | 19 +- p2p/sentry/simulator/syncutil.go | 2 +- turbo/app/README.md | 71 ++ turbo/app/make_app.go | 39 +- turbo/app/snapshots_cmd.go | 100 +- turbo/cli/flags.go | 69 +- turbo/debug/flags.go | 2 +- turbo/execution/eth1/ethereum_execution.go | 27 +- turbo/execution/eth1/forkchoice.go | 2 +- turbo/logging/logging.go | 10 +- turbo/services/interfaces.go | 7 +- .../snapshotsync/freezeblocks/block_reader.go | 65 +- .../freezeblocks/block_reader_test.go | 64 +- .../freezeblocks/block_snapshots.go | 441 +++++--- .../freezeblocks/block_snapshots_test.go | 32 +- .../freezeblocks/bor_snapshots.go | 160 ++- .../freezeblocks/caplin_snapshots.go | 32 +- turbo/snapshotsync/freezeblocks/dump_test.go | 4 +- turbo/snapshotsync/snapshotsync.go | 6 +- turbo/stages/genesis_test.go | 2 +- turbo/stages/headerdownload/header_algos.go | 11 +- turbo/stages/mock/mock_sentry.go | 26 +- turbo/stages/stageloop.go | 104 +- 86 files changed, 6593 insertions(+), 673 deletions(-) create mode 100644 cmd/snapshots/README.md create mode 100644 cmd/snapshots/cmp/cmp.go create mode 100644 cmd/snapshots/copy/copy.go create mode 100644 cmd/snapshots/flags/flags.go create mode 100644 cmd/snapshots/main.go create mode 100644 cmd/snapshots/manifest/manifest.go create mode 100644 cmd/snapshots/sync/context.go create mode 100644 cmd/snapshots/sync/sync.go create mode 100644 cmd/snapshots/sync/util.go create mode 100644 cmd/snapshots/torrents/torrents.go create mode 100644 cmd/snapshots/verify/verify.go create mode 100644 erigon-lib/downloader/rclone.go create mode 100644 erigon-lib/downloader/rclone_test.go create mode 100644 turbo/app/README.md diff --git a/.gitignore b/.gitignore index 3ce4eeca701..123c1eb2b93 100644 --- a/.gitignore +++ b/.gitignore @@ -98,3 +98,5 @@ node_modules /config.toml /config.yaml /config.yml + +vendor \ No newline at end of file diff --git a/Makefile b/Makefile index 8890d6c1dc7..7ab3fb4248d 100644 --- a/Makefile +++ b/Makefile @@ -134,6 +134,8 @@ COMMANDS += sentinel COMMANDS += caplin COMMANDS += caplin-regression COMMANDS += tooling +COMMANDS += snapshots + diff --git a/cl/antiquary/antiquary.go b/cl/antiquary/antiquary.go index 1271bb2fec4..d3b5b36b066 100644 --- a/cl/antiquary/antiquary.go +++ b/cl/antiquary/antiquary.go @@ -209,7 +209,7 @@ func (a *Antiquary) Loop() error { if to-from < snaptype.Erigon2MergeLimit { continue } - if err := a.antiquate(from, to); err != nil { + if err := a.antiquate(a.sn.Version(), from, to); err != nil { return err } case <-a.ctx.Done(): @@ -218,12 +218,12 @@ func (a *Antiquary) Loop() error { } // Antiquate will antiquate a specific block range (aka. retire snapshots), this should be ran in the background. -func (a *Antiquary) antiquate(from, to uint64) error { +func (a *Antiquary) antiquate(version uint8, from, to uint64) error { if a.downloader == nil { return nil // Just skip if we don't have a downloader } log.Info("[Antiquary]: Antiquating", "from", from, "to", to) - if err := freezeblocks.DumpBeaconBlocks(a.ctx, a.mainDB, a.beaconDB, from, to, snaptype.Erigon2MergeLimit, a.dirs.Tmp, a.dirs.Snap, 1, log.LvlDebug, a.logger); err != nil { + if err := freezeblocks.DumpBeaconBlocks(a.ctx, a.mainDB, a.beaconDB, version, from, to, snaptype.Erigon2MergeLimit, a.dirs.Tmp, a.dirs.Snap, 1, log.LvlDebug, a.logger); err != nil { return err } diff --git a/cmd/capcli/cli.go b/cmd/capcli/cli.go index 53f9faceb7b..4a5be83f9ba 100644 --- a/cmd/capcli/cli.go +++ b/cmd/capcli/cli.go @@ -400,7 +400,9 @@ func (c *Chain) Run(ctx *Context) error { log.Info("Started chain download", "chain", c.Chain) dirs := datadir.New(c.Datadir) - csn := freezeblocks.NewCaplinSnapshots(ethconfig.BlocksFreezing{}, beaconConfig, dirs.Snap, log.Root()) + snapshotVersion := snapcfg.KnownCfg(c.Chain, 0).Version + + csn := freezeblocks.NewCaplinSnapshots(ethconfig.BlocksFreezing{}, beaconConfig, dirs.Snap, snapshotVersion, log.Root()) rawDB, _ := persistence.AferoRawBeaconBlockChainFromOsPath(beaconConfig, dirs.CaplinHistory) beaconDB, db, err := caplin1.OpenCaplinDatabase(ctx, db_config.DatabaseConfiguration{PruneDepth: math.MaxUint64}, beaconConfig, rawDB, dirs.CaplinIndexing, nil, false) @@ -592,7 +594,9 @@ func (c *DumpSnapshots) Run(ctx *Context) error { return }) - return freezeblocks.DumpBeaconBlocks(ctx, db, beaconDB, 0, to, snaptype.Erigon2MergeLimit, dirs.Tmp, dirs.Snap, estimate.CompressSnapshot.Workers(), log.LvlInfo, log.Root()) + snapshotVersion := snapcfg.KnownCfg(c.Chain, 0).Version + + return freezeblocks.DumpBeaconBlocks(ctx, db, beaconDB, snapshotVersion, 0, to, snaptype.Erigon2MergeLimit, dirs.Tmp, dirs.Snap, estimate.CompressSnapshot.Workers(), log.LvlInfo, log.Root()) } type CheckSnapshots struct { @@ -630,8 +634,9 @@ func (c *CheckSnapshots) Run(ctx *Context) error { } to = (to / snaptype.Erigon2MergeLimit) * snaptype.Erigon2MergeLimit + snapshotVersion := snapcfg.KnownCfg(c.Chain, 0).Version - csn := freezeblocks.NewCaplinSnapshots(ethconfig.BlocksFreezing{}, beaconConfig, dirs.Snap, log.Root()) + csn := freezeblocks.NewCaplinSnapshots(ethconfig.BlocksFreezing{}, beaconConfig, dirs.Snap, snapshotVersion, log.Root()) if err := csn.ReopenFolder(); err != nil { return err } @@ -712,7 +717,9 @@ func (c *LoopSnapshots) Run(ctx *Context) error { to = (to / snaptype.Erigon2MergeLimit) * snaptype.Erigon2MergeLimit - csn := freezeblocks.NewCaplinSnapshots(ethconfig.BlocksFreezing{}, beaconConfig, dirs.Snap, log.Root()) + snapshotVersion := snapcfg.KnownCfg(c.Chain, 0).Version + + csn := freezeblocks.NewCaplinSnapshots(ethconfig.BlocksFreezing{}, beaconConfig, dirs.Snap, snapshotVersion, log.Root()) if err := csn.ReopenFolder(); err != nil { return err } @@ -782,7 +789,14 @@ func (d *DownloadSnapshots) Run(ctx *Context) error { if err != nil { return fmt.Errorf("new server: %w", err) } - return snapshotsync.WaitForDownloader("CapCliDownloader", ctx, false, snapshotsync.OnlyCaplin, s, tx, freezeblocks.NewBlockReader(freezeblocks.NewRoSnapshots(ethconfig.NewSnapCfg(false, false, false), dirs.Snap, log.Root()), freezeblocks.NewBorRoSnapshots(ethconfig.NewSnapCfg(false, false, false), dirs.Snap, log.Root())), params.ChainConfigByChainName(d.Chain), direct.NewDownloaderClient(bittorrentServer)) + + snapshotVersion := snapcfg.KnownCfg(d.Chain, 0).Version + + return snapshotsync.WaitForDownloader(ctx, "CapCliDownloader", false, snapshotsync.OnlyCaplin, s, tx, + freezeblocks.NewBlockReader( + freezeblocks.NewRoSnapshots(ethconfig.NewSnapCfg(false, false, false), dirs.Snap, snapshotVersion, log.Root()), + freezeblocks.NewBorRoSnapshots(ethconfig.NewSnapCfg(false, false, false), dirs.Snap, snapshotVersion, log.Root())), + params.ChainConfigByChainName(d.Chain), direct.NewDownloaderClient(bittorrentServer)) } type RetrieveHistoricalState struct { @@ -811,7 +825,9 @@ func (r *RetrieveHistoricalState) Run(ctx *Context) error { return err } defer tx.Rollback() - allSnapshots := freezeblocks.NewRoSnapshots(ethconfig.BlocksFreezing{}, dirs.Snap, log.Root()) + snapshotVersion := snapcfg.KnownCfg(r.Chain, 0).Version + + allSnapshots := freezeblocks.NewRoSnapshots(ethconfig.BlocksFreezing{}, dirs.Snap, snapshotVersion, log.Root()) if err := allSnapshots.ReopenFolder(); err != nil { return err } @@ -822,7 +838,7 @@ func (r *RetrieveHistoricalState) Run(ctx *Context) error { var bor *freezeblocks.BorRoSnapshots blockReader := freezeblocks.NewBlockReader(allSnapshots, bor) eth1Getter := getters.NewExecutionSnapshotReader(ctx, beaconConfig, blockReader, db) - csn := freezeblocks.NewCaplinSnapshots(ethconfig.BlocksFreezing{}, beaconConfig, dirs.Snap, log.Root()) + csn := freezeblocks.NewCaplinSnapshots(ethconfig.BlocksFreezing{}, beaconConfig, dirs.Snap, snapshotVersion, log.Root()) if err := csn.ReopenFolder(); err != nil { return err } diff --git a/cmd/caplin/caplin1/run.go b/cmd/caplin/caplin1/run.go index 9a2f7a63230..9f3cf050857 100644 --- a/cmd/caplin/caplin1/run.go +++ b/cmd/caplin/caplin1/run.go @@ -87,7 +87,7 @@ func OpenCaplinDatabase(ctx context.Context, func RunCaplinPhase1(ctx context.Context, sentinel sentinel.SentinelClient, engine execution_client.ExecutionEngine, beaconConfig *clparams.BeaconChainConfig, genesisConfig *clparams.GenesisConfig, state *state.CachingBeaconState, - caplinFreezer freezer.Freezer, dirs datadir.Dirs, cfg beacon_router_configuration.RouterConfiguration, eth1Getter snapshot_format.ExecutionBlockReaderByNumber, + caplinFreezer freezer.Freezer, dirs datadir.Dirs, snapshotVersion uint8, cfg beacon_router_configuration.RouterConfiguration, eth1Getter snapshot_format.ExecutionBlockReaderByNumber, snDownloader proto_downloader.DownloaderClient, backfilling bool, states bool, historyDB persistence.BeaconChainDatabase, indexDB kv.RwDB) error { rawDB, af := persistence.AferoRawBeaconBlockChainFromOsPath(beaconConfig, dirs.CaplinHistory) @@ -98,7 +98,7 @@ func RunCaplinPhase1(ctx context.Context, sentinel sentinel.SentinelClient, engi logger := log.New("app", "caplin") - csn := freezeblocks.NewCaplinSnapshots(ethconfig.BlocksFreezing{}, beaconConfig, dirs.Snap, logger) + csn := freezeblocks.NewCaplinSnapshots(ethconfig.BlocksFreezing{}, beaconConfig, dirs.Snap, snapshotVersion, logger) rcsn := freezeblocks.NewBeaconSnapshotReader(csn, eth1Getter, historyDB, beaconConfig) if caplinFreezer != nil { diff --git a/cmd/caplin/main.go b/cmd/caplin/main.go index 59a580d66c7..00457207ebb 100644 --- a/cmd/caplin/main.go +++ b/cmd/caplin/main.go @@ -16,6 +16,7 @@ import ( "fmt" "os" + "github.com/ledgerwatch/erigon-lib/chain/snapcfg" "github.com/ledgerwatch/erigon/cl/beacon/beacon_router_configuration" "github.com/ledgerwatch/erigon/cl/cltypes" "github.com/ledgerwatch/erigon/cl/fork" @@ -128,7 +129,9 @@ func runCaplinNode(cliCtx *cli.Context) error { return err } - return caplin1.RunCaplinPhase1(ctx, sentinel, executionEngine, cfg.BeaconCfg, cfg.GenesisCfg, state, caplinFreezer, cfg.Dirs, beacon_router_configuration.RouterConfiguration{ + snapshotVersion := snapcfg.KnownCfg(cliCtx.String(utils.ChainFlag.Name), 0).Version + + return caplin1.RunCaplinPhase1(ctx, sentinel, executionEngine, cfg.BeaconCfg, cfg.GenesisCfg, state, caplinFreezer, cfg.Dirs, snapshotVersion, beacon_router_configuration.RouterConfiguration{ Protocol: cfg.BeaconProtocol, Address: cfg.BeaconAddr, ReadTimeTimeout: cfg.BeaconApiReadTimeout, diff --git a/cmd/devnet/main.go b/cmd/devnet/main.go index 69f66e7a795..2c3a3371769 100644 --- a/cmd/devnet/main.go +++ b/cmd/devnet/main.go @@ -175,7 +175,7 @@ func setupLogger(ctx *cli.Context) (log.Logger, error) { return nil, err } - logger := logging.SetupLoggerCtx("devnet", ctx, false /* rootLogger */) + logger := logging.SetupLoggerCtx("devnet", ctx, log.LvlInfo, log.LvlInfo, false /* rootLogger */) // Make root logger fail log.Root().SetHandler(PanicHandler{}) diff --git a/cmd/downloader/main.go b/cmd/downloader/main.go index b036ca87cec..7db4324abb6 100644 --- a/cmd/downloader/main.go +++ b/cmd/downloader/main.go @@ -20,7 +20,7 @@ import ( "github.com/ledgerwatch/erigon-lib/common/datadir" "github.com/ledgerwatch/erigon-lib/common/dir" "github.com/ledgerwatch/erigon-lib/downloader" - downloadercfg2 "github.com/ledgerwatch/erigon-lib/downloader/downloadercfg" + "github.com/ledgerwatch/erigon-lib/downloader/downloadercfg" "github.com/ledgerwatch/erigon-lib/downloader/snaptype" proto_downloader "github.com/ledgerwatch/erigon-lib/gointerfaces/downloader" "github.com/ledgerwatch/erigon-lib/kv" @@ -164,7 +164,7 @@ func Downloader(ctx context.Context, logger log.Logger) error { if err := checkChainName(ctx, dirs, chain); err != nil { return err } - torrentLogLevel, _, err := downloadercfg2.Int2LogLevel(torrentVerbosity) + torrentLogLevel, _, err := downloadercfg.Int2LogLevel(torrentVerbosity) if err != nil { return err } @@ -186,7 +186,7 @@ func Downloader(ctx context.Context, logger log.Logger) error { if known, ok := snapcfg.KnownWebseeds[chain]; ok { webseedsList = append(webseedsList, known...) } - cfg, err := downloadercfg2.New(dirs, version, torrentLogLevel, downloadRate, uploadRate, torrentPort, torrentConnsPerFile, torrentDownloadSlots, staticPeers, webseedsList, chain) + cfg, err := downloadercfg.New(dirs, version, torrentLogLevel, downloadRate, uploadRate, torrentPort, torrentConnsPerFile, torrentDownloadSlots, staticPeers, webseedsList, chain) if err != nil { return err } @@ -201,6 +201,8 @@ func Downloader(ctx context.Context, logger log.Logger) error { } downloadernat.DoNat(natif, cfg.ClientConfig, logger) + cfg.AddTorrentsFromDisk = true // always true unless using uploader - which wants control of torrent files + d, err := downloader.New(ctx, cfg, dirs, logger, log.LvlInfo, seedbox) if err != nil { return err @@ -402,6 +404,7 @@ func doPrintTorrentHashes(ctx context.Context, logger log.Logger) error { if err != nil { return err } + for _, t := range torrents { // we don't release commitment history in this time. let's skip it here. if strings.HasPrefix(t.DisplayName, "history/v1-commitment") || strings.HasPrefix(t.DisplayName, "idx/v1-commitment") { @@ -494,7 +497,7 @@ func StartGrpc(snServer *downloader.GrpcServer, addr string, creds *credentials. // Add pre-configured func addPreConfiguredHashes(ctx context.Context, d *downloader.Downloader) error { - for _, it := range snapcfg.KnownCfg(chain).Preverified { + for _, it := range snapcfg.KnownCfg(chain, 0).Preverified { if err := d.AddMagnetLink(ctx, snaptype.Hex2InfoHash(it.Hash), it.Name); err != nil { return err } diff --git a/cmd/hack/hack.go b/cmd/hack/hack.go index 42aa5932bdf..b6585f90f42 100644 --- a/cmd/hack/hack.go +++ b/cmd/hack/hack.go @@ -8,7 +8,6 @@ import ( "encoding/json" "flag" "fmt" - "github.com/ledgerwatch/erigon-lib/kv/dbutils" "math/big" "net/http" _ "net/http/pprof" //nolint:gosec @@ -19,6 +18,8 @@ import ( "strings" "time" + "github.com/ledgerwatch/erigon-lib/kv/dbutils" + "github.com/RoaringBitmap/roaring/roaring64" "github.com/holiman/uint256" "github.com/ledgerwatch/log/v3" @@ -59,15 +60,16 @@ import ( ) var ( - action = flag.String("action", "", "action to execute") - cpuprofile = flag.String("cpuprofile", "", "write cpu profile `file`") - block = flag.Int("block", 1, "specifies a block number for operation") - blockTotal = flag.Int("blocktotal", 1, "specifies a total amount of blocks to process (will offset from head block if <= 0)") - account = flag.String("account", "0x", "specifies account to investigate") - name = flag.String("name", "", "name to add to the file names") - chaindata = flag.String("chaindata", "chaindata", "path to the chaindata database file") - bucket = flag.String("bucket", "", "bucket in the database") - hash = flag.String("hash", "0x00", "image for preimage or state root for testBlockHashes action") + action = flag.String("action", "", "action to execute") + cpuprofile = flag.String("cpuprofile", "", "write cpu profile `file`") + block = flag.Int("block", 1, "specifies a block number for operation") + blockTotal = flag.Int("blocktotal", 1, "specifies a total amount of blocks to process (will offset from head block if <= 0)") + account = flag.String("account", "0x", "specifies account to investigate") + name = flag.String("name", "", "name to add to the file names") + chaindata = flag.String("chaindata", "chaindata", "path to the chaindata database file") + bucket = flag.String("bucket", "", "bucket in the database") + hash = flag.String("hash", "0x00", "image for preimage or state root for testBlockHashes action") + shapshotVersion = flag.Uint("stapshots.version", 1, "specifies the snapshot file version") ) func dbSlice(chaindata string, bucket string, prefix []byte) { @@ -91,10 +93,10 @@ func dbSlice(chaindata string, bucket string, prefix []byte) { } // Searches 1000 blocks from the given one to try to find the one with the given state root hash -func testBlockHashes(chaindata string, block int, stateRoot libcommon.Hash) { +func testBlockHashes(chaindata string, snapshotVersion uint8, block int, stateRoot libcommon.Hash) { ethDb := mdbx.MustOpen(chaindata) defer ethDb.Close() - br, _ := blocksIO(ethDb) + br, _ := blocksIO(ethDb, snapshotVersion) tool.Check(ethDb.View(context.Background(), func(tx kv.Tx) error { blocksToSearch := 10000000 for i := uint64(block); i < uint64(block+blocksToSearch); i++ { @@ -130,7 +132,7 @@ func printCurrentBlockNumber(chaindata string) { }) } -func blocksIO(db kv.RoDB) (services.FullBlockReader, *blockio.BlockWriter) { +func blocksIO(db kv.RoDB, snapshotVersion uint8) (services.FullBlockReader, *blockio.BlockWriter) { var histV3 bool if err := db.View(context.Background(), func(tx kv.Tx) error { histV3, _ = kvcfg.HistoryV3.Enabled(tx) @@ -138,15 +140,15 @@ func blocksIO(db kv.RoDB) (services.FullBlockReader, *blockio.BlockWriter) { }); err != nil { panic(err) } - br := freezeblocks.NewBlockReader(freezeblocks.NewRoSnapshots(ethconfig.BlocksFreezing{Enabled: false}, "", log.New()), nil /* BorSnapshots */) + br := freezeblocks.NewBlockReader(freezeblocks.NewRoSnapshots(ethconfig.BlocksFreezing{Enabled: false}, "", snapshotVersion, log.New()), nil /* BorSnapshots */) bw := blockio.NewBlockWriter(histV3) return br, bw } -func printTxHashes(chaindata string, block uint64) error { +func printTxHashes(chaindata string, snapshotVersion uint8, block uint64) error { db := mdbx.MustOpen(chaindata) defer db.Close() - br, _ := blocksIO(db) + br, _ := blocksIO(db, snapshotVersion) if err := db.View(context.Background(), func(tx kv.Tx) error { for b := block; b < block+1; b++ { block, _ := br.BlockByNumber(context.Background(), tx, b) @@ -458,10 +460,10 @@ func getBlockTotal(tx kv.Tx, blockFrom uint64, blockTotalOrOffset int64) uint64 return 1 } -func extractHashes(chaindata string, blockStep uint64, blockTotalOrOffset int64, name string) error { +func extractHashes(chaindata string, snapshotVersion uint8, blockStep uint64, blockTotalOrOffset int64, name string) error { db := mdbx.MustOpen(chaindata) defer db.Close() - br, _ := blocksIO(db) + br, _ := blocksIO(db, snapshotVersion) f, err := os.Create(fmt.Sprintf("preverified_hashes_%s.go", name)) if err != nil { @@ -533,12 +535,12 @@ func extractHeaders(chaindata string, block uint64, blockTotalOrOffset int64) er return nil } -func extractBodies(datadir string) error { +func extractBodies(datadir string, snapshotVersion uint8) error { snaps := freezeblocks.NewRoSnapshots(ethconfig.BlocksFreezing{ Enabled: true, KeepBlocks: true, Produce: false, - }, filepath.Join(datadir, "snapshots"), log.New()) + }, filepath.Join(datadir, "snapshots"), snapshotVersion, log.New()) snaps.ReopenFolder() /* method Iterate was removed, need re-implement @@ -577,7 +579,7 @@ func extractBodies(datadir string) error { */ db := mdbx.MustOpen(filepath.Join(datadir, "chaindata")) defer db.Close() - br, _ := blocksIO(db) + br, _ := blocksIO(db, snapshotVersion) tx, err := db.BeginRo(context.Background()) if err != nil { @@ -1023,7 +1025,7 @@ func scanReceipts3(chaindata string, block uint64) error { return nil } -func scanReceipts2(chaindata string) error { +func scanReceipts2(chaindata string, snapshotVersion uint8) error { f, err := os.Create("receipts.txt") if err != nil { return err @@ -1037,7 +1039,7 @@ func scanReceipts2(chaindata string) error { if err != nil { return err } - br, _ := blocksIO(dbdb) + br, _ := blocksIO(dbdb, snapshotVersion) defer tx.Rollback() blockNum, err := historyv2.AvailableFrom(tx) @@ -1386,7 +1388,7 @@ func main() { flow.TestGenCfg() case "testBlockHashes": - testBlockHashes(*chaindata, *block, libcommon.HexToHash(*hash)) + testBlockHashes(*chaindata, uint8(*shapshotVersion), *block, libcommon.HexToHash(*hash)) case "readAccount": if err := readAccount(*chaindata, libcommon.HexToAddress(*account)); err != nil { @@ -1424,7 +1426,7 @@ func main() { err = extractHeaders(*chaindata, uint64(*block), int64(*blockTotal)) case "extractHashes": - err = extractHashes(*chaindata, uint64(*block), int64(*blockTotal), *name) + err = extractHashes(*chaindata, uint8(*shapshotVersion), uint64(*block), int64(*blockTotal), *name) case "defrag": err = hackdb.Defrag() @@ -1433,13 +1435,13 @@ func main() { err = hackdb.TextInfo(*chaindata, &strings.Builder{}) case "extractBodies": - err = extractBodies(*chaindata) + err = extractBodies(*chaindata, uint8(*shapshotVersion)) case "repairCurrent": repairCurrent() case "printTxHashes": - printTxHashes(*chaindata, uint64(*block)) + printTxHashes(*chaindata, uint8(*shapshotVersion), uint64(*block)) case "snapSizes": err = snapSizes(*chaindata) @@ -1466,7 +1468,7 @@ func main() { err = scanTxs(*chaindata) case "scanReceipts2": - err = scanReceipts2(*chaindata) + err = scanReceipts2(*chaindata, uint8(*shapshotVersion)) case "scanReceipts3": err = scanReceipts3(*chaindata, uint64(*block)) diff --git a/cmd/integration/commands/flags.go b/cmd/integration/commands/flags.go index 22e583d0fff..ea80e124110 100644 --- a/cmd/integration/commands/flags.go +++ b/cmd/integration/commands/flags.go @@ -40,6 +40,7 @@ var ( _forceSetHistoryV3 bool workers, reconWorkers uint64 + snapshotVersion uint8 = 1 ) func must(err error) { @@ -170,3 +171,7 @@ func withCommitment(cmd *cobra.Command) { cmd.Flags().StringVar(&commitmentTrie, "commitment.trie", "hex", "hex - use Hex Patricia Hashed Trie for commitments, bin - use of binary patricia trie") cmd.Flags().IntVar(&commitmentFreq, "commitment.freq", 1000000, "how many blocks to skip between calculating commitment") } + +func withSnapshotVersion(cmd *cobra.Command) { + cmd.Flags().Uint8Var(&snapshotVersion, "stapshots.version", 1, "specifies the snapshot file version") +} diff --git a/cmd/integration/commands/reset_state.go b/cmd/integration/commands/reset_state.go index dec456eb865..852fd4a0d62 100644 --- a/cmd/integration/commands/reset_state.go +++ b/cmd/integration/commands/reset_state.go @@ -30,14 +30,14 @@ var cmdResetState = &cobra.Command{ Short: "Reset StateStages (5,6,7,8,9,10) and buckets", Run: func(cmd *cobra.Command, args []string) { logger := debug.SetupCobra(cmd, "integration") - db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, logger) + db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, snapshotVersion, logger) if err != nil { logger.Error("Opening DB", "error", err) return } ctx, _ := common.RootContext() defer db.Close() - sn, borSn, agg := allSnapshots(ctx, db, logger) + sn, borSn, agg := allSnapshots(ctx, db, snapshotVersion, logger) defer sn.Close() defer borSn.Close() defer agg.Close() @@ -73,7 +73,7 @@ var cmdClearBadBlocks = &cobra.Command{ RunE: func(cmd *cobra.Command, args []string) error { logger := debug.SetupCobra(cmd, "integration") ctx, _ := common.RootContext() - db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, logger) + db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, snapshotVersion, logger) if err != nil { logger.Error("Opening DB", "error", err) return err @@ -90,10 +90,11 @@ func init() { withConfig(cmdResetState) withDataDir(cmdResetState) withChain(cmdResetState) - + withSnapshotVersion(cmdResetState) rootCmd.AddCommand(cmdResetState) withDataDir(cmdClearBadBlocks) + withSnapshotVersion(cmdClearBadBlocks) rootCmd.AddCommand(cmdClearBadBlocks) } diff --git a/cmd/integration/commands/root.go b/cmd/integration/commands/root.go index 95120c4f822..e90e38b2222 100644 --- a/cmd/integration/commands/root.go +++ b/cmd/integration/commands/root.go @@ -72,7 +72,7 @@ func dbCfg(label kv.Label, path string) kv2.MdbxOpts { return opts } -func openDB(opts kv2.MdbxOpts, applyMigrations bool, logger log.Logger) (kv.RwDB, error) { +func openDB(opts kv2.MdbxOpts, applyMigrations bool, snapshotVersion uint8, logger log.Logger) (kv.RwDB, error) { db := opts.MustOpen() if applyMigrations { migrator := migrations.NewMigrator(opts.GetLabel()) @@ -105,7 +105,7 @@ func openDB(opts kv2.MdbxOpts, applyMigrations bool, logger log.Logger) (kv.RwDB return nil, err } if h3 { - _, _, agg := allSnapshots(context.Background(), db, logger) + _, _, agg := allSnapshots(context.Background(), db, snapshotVersion, logger) tdb, err := temporal.New(db, agg, systemcontracts.SystemContractCodeLookup[chain]) if err != nil { return nil, err diff --git a/cmd/integration/commands/stages.go b/cmd/integration/commands/stages.go index c4dea1d3a55..9fee599d79f 100644 --- a/cmd/integration/commands/stages.go +++ b/cmd/integration/commands/stages.go @@ -65,7 +65,7 @@ var cmdStageSnapshots = &cobra.Command{ Short: "", Run: func(cmd *cobra.Command, args []string) { logger := debug.SetupCobra(cmd, "integration") - db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, logger) + db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, snapshotVersion, logger) if err != nil { logger.Error("Opening DB", "error", err) return @@ -86,7 +86,7 @@ var cmdStageHeaders = &cobra.Command{ Short: "", Run: func(cmd *cobra.Command, args []string) { logger := debug.SetupCobra(cmd, "integration") - db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, logger) + db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, snapshotVersion, logger) if err != nil { logger.Error("Opening DB", "error", err) return @@ -107,7 +107,7 @@ var cmdStageBorHeimdall = &cobra.Command{ Short: "", Run: func(cmd *cobra.Command, args []string) { logger := debug.SetupCobra(cmd, "integration") - db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, logger) + db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, snapshotVersion, logger) if err != nil { logger.Error("Opening DB", "error", err) return @@ -128,7 +128,7 @@ var cmdStageBodies = &cobra.Command{ Short: "", Run: func(cmd *cobra.Command, args []string) { logger := debug.SetupCobra(cmd, "integration") - db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, logger) + db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, snapshotVersion, logger) if err != nil { logger.Error("Opening DB", "error", err) return @@ -149,7 +149,7 @@ var cmdStageSenders = &cobra.Command{ Short: "", Run: func(cmd *cobra.Command, args []string) { logger := debug.SetupCobra(cmd, "integration") - db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, logger) + db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, snapshotVersion, logger) if err != nil { logger.Error("Opening DB", "error", err) return @@ -170,7 +170,7 @@ var cmdStageExec = &cobra.Command{ Short: "", Run: func(cmd *cobra.Command, args []string) { logger := debug.SetupCobra(cmd, "integration") - db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, logger) + db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, snapshotVersion, logger) if err != nil { logger.Error("Opening DB", "error", err) return @@ -193,7 +193,7 @@ var cmdStageTrie = &cobra.Command{ Short: "", Run: func(cmd *cobra.Command, args []string) { logger := debug.SetupCobra(cmd, "integration") - db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, logger) + db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, snapshotVersion, logger) if err != nil { logger.Error("Opening DB", "error", err) return @@ -214,7 +214,7 @@ var cmdStageHashState = &cobra.Command{ Short: "", Run: func(cmd *cobra.Command, args []string) { logger := debug.SetupCobra(cmd, "integration") - db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, logger) + db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, snapshotVersion, logger) if err != nil { logger.Error("Opening DB", "error", err) return @@ -235,7 +235,7 @@ var cmdStageHistory = &cobra.Command{ Short: "", Run: func(cmd *cobra.Command, args []string) { logger := debug.SetupCobra(cmd, "integration") - db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, logger) + db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, snapshotVersion, logger) if err != nil { logger.Error("Opening DB", "error", err) return @@ -256,7 +256,7 @@ var cmdLogIndex = &cobra.Command{ Short: "", Run: func(cmd *cobra.Command, args []string) { logger := debug.SetupCobra(cmd, "integration") - db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, logger) + db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, snapshotVersion, logger) if err != nil { logger.Error("Opening DB", "error", err) return @@ -277,7 +277,7 @@ var cmdCallTraces = &cobra.Command{ Short: "", Run: func(cmd *cobra.Command, args []string) { logger := debug.SetupCobra(cmd, "integration") - db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, logger) + db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, snapshotVersion, logger) if err != nil { logger.Error("Opening DB", "error", err) return @@ -298,7 +298,7 @@ var cmdStageTxLookup = &cobra.Command{ Short: "", Run: func(cmd *cobra.Command, args []string) { logger := debug.SetupCobra(cmd, "integration") - db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, logger) + db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, snapshotVersion, logger) if err != nil { logger.Error("Opening DB", "error", err) return @@ -318,7 +318,7 @@ var cmdPrintStages = &cobra.Command{ Short: "", Run: func(cmd *cobra.Command, args []string) { logger := debug.SetupCobra(cmd, "integration") - db, err := openDB(dbCfg(kv.ChainDB, chaindata), false, logger) + db, err := openDB(dbCfg(kv.ChainDB, chaindata), false, snapshotVersion, logger) if err != nil { logger.Error("Opening DB", "error", err) return @@ -339,7 +339,7 @@ var cmdPrintMigrations = &cobra.Command{ Short: "", Run: func(cmd *cobra.Command, args []string) { logger := debug.SetupCobra(cmd, "integration") - db, err := openDB(dbCfg(kv.ChainDB, chaindata), false, logger) + db, err := openDB(dbCfg(kv.ChainDB, chaindata), false, snapshotVersion, logger) if err != nil { logger.Error("Opening DB", "error", err) return @@ -359,7 +359,7 @@ var cmdRemoveMigration = &cobra.Command{ Short: "", Run: func(cmd *cobra.Command, args []string) { logger := debug.SetupCobra(cmd, "integration") - db, err := openDB(dbCfg(kv.ChainDB, chaindata), false, logger) + db, err := openDB(dbCfg(kv.ChainDB, chaindata), false, snapshotVersion, logger) if err != nil { logger.Error("Opening DB", "error", err) return @@ -381,7 +381,7 @@ var cmdRunMigrations = &cobra.Command{ logger := debug.SetupCobra(cmd, "integration") //non-accede and exclusive mode - to apply create new tables if need. cfg := dbCfg(kv.ChainDB, chaindata).Flags(func(u uint) uint { return u &^ mdbx.Accede }).Exclusive() - db, err := openDB(cfg, true, logger) + db, err := openDB(cfg, true, snapshotVersion, logger) if err != nil { logger.Error("Opening DB", "error", err) return @@ -396,7 +396,7 @@ var cmdSetPrune = &cobra.Command{ Short: "Override existing --prune flag value (if you know what you are doing)", Run: func(cmd *cobra.Command, args []string) { logger := debug.SetupCobra(cmd, "integration") - db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, logger) + db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, snapshotVersion, logger) if err != nil { logger.Error("Opening DB", "error", err) return @@ -416,13 +416,13 @@ var cmdSetSnap = &cobra.Command{ Short: "Override existing --snapshots flag value (if you know what you are doing)", Run: func(cmd *cobra.Command, args []string) { logger := debug.SetupCobra(cmd, "integration") - db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, logger) + db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, snapshotVersion, logger) if err != nil { logger.Error("Opening DB", "error", err) return } defer db.Close() - sn, borSn, agg := allSnapshots(cmd.Context(), db, logger) + sn, borSn, agg := allSnapshots(cmd.Context(), db, snapshotVersion, logger) defer sn.Close() defer borSn.Close() defer agg.Close() @@ -452,7 +452,7 @@ var cmdForceSetHistoryV3 = &cobra.Command{ Short: "Override existing --history.v3 flag value (if you know what you are doing)", Run: func(cmd *cobra.Command, args []string) { logger := debug.SetupCobra(cmd, "integration") - db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, logger) + db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, snapshotVersion, logger) if err != nil { logger.Error("Opening DB", "error", err) return @@ -474,6 +474,7 @@ func init() { withDataDir(cmdPrintStages) withChain(cmdPrintStages) withHeimdall(cmdPrintStages) + withSnapshotVersion(cmdPrintStages) rootCmd.AddCommand(cmdPrintStages) withConfig(cmdStageSenders) @@ -484,11 +485,13 @@ func init() { withDataDir(cmdStageSenders) withChain(cmdStageSenders) withHeimdall(cmdStageSenders) + withSnapshotVersion(cmdStageSenders) rootCmd.AddCommand(cmdStageSenders) withConfig(cmdStageSnapshots) withDataDir(cmdStageSnapshots) withReset(cmdStageSnapshots) + withSnapshotVersion(cmdStageSnapshots) rootCmd.AddCommand(cmdStageSnapshots) withConfig(cmdStageHeaders) @@ -498,6 +501,7 @@ func init() { withReset(cmdStageHeaders) withChain(cmdStageHeaders) withHeimdall(cmdStageHeaders) + withSnapshotVersion(cmdStageHeaders) rootCmd.AddCommand(cmdStageHeaders) withConfig(cmdStageBorHeimdall) @@ -505,6 +509,7 @@ func init() { withReset(cmdStageBorHeimdall) withChain(cmdStageBorHeimdall) withHeimdall(cmdStageBorHeimdall) + withSnapshotVersion(cmdStageBorHeimdall) rootCmd.AddCommand(cmdStageBorHeimdall) withConfig(cmdStageBodies) @@ -512,6 +517,7 @@ func init() { withUnwind(cmdStageBodies) withChain(cmdStageBodies) withHeimdall(cmdStageBodies) + withSnapshotVersion(cmdStageBodies) rootCmd.AddCommand(cmdStageBodies) withConfig(cmdStageExec) @@ -526,6 +532,7 @@ func init() { withChain(cmdStageExec) withHeimdall(cmdStageExec) withWorkers(cmdStageExec) + withSnapshotVersion(cmdStageExec) rootCmd.AddCommand(cmdStageExec) withConfig(cmdStageHashState) @@ -537,6 +544,7 @@ func init() { withBatchSize(cmdStageHashState) withChain(cmdStageHashState) withHeimdall(cmdStageHashState) + withSnapshotVersion(cmdStageHashState) rootCmd.AddCommand(cmdStageHashState) withConfig(cmdStageTrie) @@ -548,6 +556,7 @@ func init() { withIntegrityChecks(cmdStageTrie) withChain(cmdStageTrie) withHeimdall(cmdStageTrie) + withSnapshotVersion(cmdStageTrie) rootCmd.AddCommand(cmdStageTrie) withConfig(cmdStageHistory) @@ -558,6 +567,7 @@ func init() { withPruneTo(cmdStageHistory) withChain(cmdStageHistory) withHeimdall(cmdStageHistory) + withSnapshotVersion(cmdStageHistory) rootCmd.AddCommand(cmdStageHistory) withConfig(cmdLogIndex) @@ -568,6 +578,7 @@ func init() { withPruneTo(cmdLogIndex) withChain(cmdLogIndex) withHeimdall(cmdLogIndex) + withSnapshotVersion(cmdLogIndex) rootCmd.AddCommand(cmdLogIndex) withConfig(cmdCallTraces) @@ -578,6 +589,7 @@ func init() { withPruneTo(cmdCallTraces) withChain(cmdCallTraces) withHeimdall(cmdCallTraces) + withSnapshotVersion(cmdCallTraces) rootCmd.AddCommand(cmdCallTraces) withConfig(cmdStageTxLookup) @@ -588,10 +600,12 @@ func init() { withPruneTo(cmdStageTxLookup) withChain(cmdStageTxLookup) withHeimdall(cmdStageTxLookup) + withSnapshotVersion(cmdStageTxLookup) rootCmd.AddCommand(cmdStageTxLookup) withConfig(cmdPrintMigrations) withDataDir(cmdPrintMigrations) + withSnapshotVersion(cmdPrintMigrations) rootCmd.AddCommand(cmdPrintMigrations) withConfig(cmdRemoveMigration) @@ -599,23 +613,27 @@ func init() { withMigration(cmdRemoveMigration) withChain(cmdRemoveMigration) withHeimdall(cmdRemoveMigration) + withSnapshotVersion(cmdRemoveMigration) rootCmd.AddCommand(cmdRemoveMigration) withConfig(cmdRunMigrations) withDataDir(cmdRunMigrations) withChain(cmdRunMigrations) withHeimdall(cmdRunMigrations) + withSnapshotVersion(cmdRunMigrations) rootCmd.AddCommand(cmdRunMigrations) withConfig(cmdSetSnap) withDataDir2(cmdSetSnap) withChain(cmdSetSnap) + withSnapshotVersion(cmdSetSnap) cmdSetSnap.Flags().Bool("snapshots", false, "") must(cmdSetSnap.MarkFlagRequired("snapshots")) rootCmd.AddCommand(cmdSetSnap) withConfig(cmdForceSetHistoryV3) withDataDir2(cmdForceSetHistoryV3) + withSnapshotVersion(cmdForceSetHistoryV3) cmdForceSetHistoryV3.Flags().BoolVar(&_forceSetHistoryV3, "history.v3", false, "") must(cmdForceSetHistoryV3.MarkFlagRequired("history.v3")) rootCmd.AddCommand(cmdForceSetHistoryV3) @@ -623,6 +641,7 @@ func init() { withConfig(cmdSetPrune) withDataDir(cmdSetPrune) withChain(cmdSetPrune) + withSnapshotVersion(cmdSetPrune) cmdSetPrune.Flags().StringVar(&pruneFlag, "prune", "hrtc", "") cmdSetPrune.Flags().Uint64Var(&pruneH, "prune.h.older", 0, "") cmdSetPrune.Flags().Uint64Var(&pruneR, "prune.r.older", 0, "") @@ -658,7 +677,7 @@ func stageHeaders(db kv.RwDB, ctx context.Context, logger log.Logger) error { return err } - sn, borSn, agg := allSnapshots(ctx, db, logger) + sn, borSn, agg := allSnapshots(ctx, db, snapshotVersion, logger) defer sn.Close() defer borSn.Close() defer agg.Close() @@ -756,7 +775,7 @@ func stageBorHeimdall(db kv.RwDB, ctx context.Context, logger log.Logger) error } func stageBodies(db kv.RwDB, ctx context.Context, logger log.Logger) error { - sn, borSn, agg := allSnapshots(ctx, db, logger) + sn, borSn, agg := allSnapshots(ctx, db, snapshotVersion, logger) defer sn.Close() defer borSn.Close() defer agg.Close() @@ -773,7 +792,7 @@ func stageBodies(db kv.RwDB, ctx context.Context, logger log.Logger) error { } u := sync.NewUnwindState(stages.Bodies, s.BlockNumber-unwind, s.BlockNumber) - cfg := stagedsync.StageBodiesCfg(db, nil, nil, nil, nil, 0, *chainConfig, br, historyV3, bw) + cfg := stagedsync.StageBodiesCfg(db, nil, nil, nil, nil, 0, *chainConfig, br, historyV3, bw, nil) if err := stagedsync.UnwindBodiesStage(u, tx, cfg, ctx); err != nil { return err } @@ -796,7 +815,7 @@ func stageBodies(db kv.RwDB, ctx context.Context, logger log.Logger) error { func stageSenders(db kv.RwDB, ctx context.Context, logger log.Logger) error { tmpdir := datadir.New(datadirCli).Tmp chainConfig := fromdb.ChainConfig(db) - sn, borSn, agg := allSnapshots(ctx, db, logger) + sn, borSn, agg := allSnapshots(ctx, db, snapshotVersion, logger) defer sn.Close() defer borSn.Close() defer agg.Close() @@ -863,7 +882,7 @@ func stageSenders(db kv.RwDB, ctx context.Context, logger log.Logger) error { return err } - cfg := stagedsync.StageSendersCfg(db, chainConfig, false, tmpdir, pm, br, nil) + cfg := stagedsync.StageSendersCfg(db, chainConfig, false, tmpdir, pm, br, nil, nil) if unwind > 0 { u := sync.NewUnwindState(stages.Senders, s.BlockNumber-unwind, s.BlockNumber) if err = stagedsync.UnwindSendersStage(u, tx, cfg, ctx); err != nil { @@ -894,7 +913,7 @@ func stageExec(db kv.RwDB, ctx context.Context, logger log.Logger) error { engine, vmConfig, sync, _, _ := newSync(ctx, db, nil /* miningConfig */, logger) must(sync.SetCurrentStage(stages.Execution)) - sn, borSn, agg := allSnapshots(ctx, db, logger) + sn, borSn, agg := allSnapshots(ctx, db, snapshotVersion, logger) defer sn.Close() defer borSn.Close() defer agg.Close() @@ -976,7 +995,7 @@ func stageExec(db kv.RwDB, ctx context.Context, logger log.Logger) error { func stageTrie(db kv.RwDB, ctx context.Context, logger log.Logger) error { dirs, pm, historyV3 := datadir.New(datadirCli), fromdb.PruneMode(db), kvcfg.HistoryV3.FromDB(db) - sn, borSn, agg := allSnapshots(ctx, db, logger) + sn, borSn, agg := allSnapshots(ctx, db, snapshotVersion, logger) defer sn.Close() defer borSn.Close() defer agg.Close() @@ -1034,7 +1053,7 @@ func stageTrie(db kv.RwDB, ctx context.Context, logger log.Logger) error { func stageHashState(db kv.RwDB, ctx context.Context, logger log.Logger) error { dirs, pm, historyV3 := datadir.New(datadirCli), fromdb.PruneMode(db), kvcfg.HistoryV3.FromDB(db) - sn, borSn, agg := allSnapshots(ctx, db, logger) + sn, borSn, agg := allSnapshots(ctx, db, snapshotVersion, logger) defer sn.Close() defer borSn.Close() defer agg.Close() @@ -1212,7 +1231,7 @@ func stageHistory(db kv.RwDB, ctx context.Context, logger log.Logger) error { if historyV3 { return fmt.Errorf("this stage is disable in --history.v3=true") } - sn, borSn, agg := allSnapshots(ctx, db, logger) + sn, borSn, agg := allSnapshots(ctx, db, snapshotVersion, logger) defer sn.Close() defer borSn.Close() defer agg.Close() @@ -1288,7 +1307,7 @@ func stageTxLookup(db kv.RwDB, ctx context.Context, logger log.Logger) error { _, _, sync, _, _ := newSync(ctx, db, nil /* miningConfig */, logger) chainConfig := fromdb.ChainConfig(db) must(sync.SetCurrentStage(stages.TxLookup)) - sn, borSn, agg := allSnapshots(ctx, db, logger) + sn, borSn, agg := allSnapshots(ctx, db, snapshotVersion, logger) defer sn.Close() defer borSn.Close() defer agg.Close() @@ -1338,7 +1357,7 @@ func stageTxLookup(db kv.RwDB, ctx context.Context, logger log.Logger) error { } func printAllStages(db kv.RoDB, ctx context.Context, logger log.Logger) error { - sn, borSn, agg := allSnapshots(ctx, db, logger) + sn, borSn, agg := allSnapshots(ctx, db, snapshotVersion, logger) defer sn.Close() defer borSn.Close() defer agg.Close() @@ -1374,7 +1393,7 @@ var _allSnapshotsSingleton *freezeblocks.RoSnapshots var _allBorSnapshotsSingleton *freezeblocks.BorRoSnapshots var _aggSingleton *libstate.AggregatorV3 -func allSnapshots(ctx context.Context, db kv.RoDB, logger log.Logger) (*freezeblocks.RoSnapshots, *freezeblocks.BorRoSnapshots, *libstate.AggregatorV3) { +func allSnapshots(ctx context.Context, db kv.RoDB, version uint8, logger log.Logger) (*freezeblocks.RoSnapshots, *freezeblocks.BorRoSnapshots, *libstate.AggregatorV3) { openSnapshotOnce.Do(func() { var useSnapshots bool _ = db.View(context.Background(), func(tx kv.Tx) error { @@ -1385,8 +1404,8 @@ func allSnapshots(ctx context.Context, db kv.RoDB, logger log.Logger) (*freezebl dir.MustExist(dirs.SnapHistory) snapCfg := ethconfig.NewSnapCfg(useSnapshots, true, true) - _allSnapshotsSingleton = freezeblocks.NewRoSnapshots(snapCfg, dirs.Snap, logger) - _allBorSnapshotsSingleton = freezeblocks.NewBorRoSnapshots(snapCfg, dirs.Snap, logger) + _allSnapshotsSingleton = freezeblocks.NewRoSnapshots(snapCfg, dirs.Snap, version, logger) + _allBorSnapshotsSingleton = freezeblocks.NewBorRoSnapshots(snapCfg, dirs.Snap, snapshotVersion, logger) var err error _aggSingleton, err = libstate.NewAggregatorV3(ctx, dirs.SnapHistory, dirs.Tmp, ethconfig.HistoryV3AggregationStep, db, logger) @@ -1402,11 +1421,11 @@ func allSnapshots(ctx context.Context, db kv.RoDB, logger log.Logger) (*freezebl if err := _allSnapshotsSingleton.ReopenFolder(); err != nil { panic(err) } - _allSnapshotsSingleton.LogStat() + _allSnapshotsSingleton.LogStat("all") if err := _allBorSnapshotsSingleton.ReopenFolder(); err != nil { panic(err) } - _allBorSnapshotsSingleton.LogStat() + _allBorSnapshotsSingleton.LogStat("all") db.View(context.Background(), func(tx kv.Tx) error { _aggSingleton.LogStats(tx, func(endTxNumMinimax uint64) uint64 { _, histBlockNumProgress, _ := rawdbv3.TxNums.FindBlockNum(tx, endTxNumMinimax) @@ -1425,7 +1444,7 @@ var _blockWriterSingleton *blockio.BlockWriter func blocksIO(db kv.RoDB, logger log.Logger) (services.FullBlockReader, *blockio.BlockWriter) { openBlockReaderOnce.Do(func() { - sn, borSn, _ := allSnapshots(context.Background(), db, logger) + sn, borSn, _ := allSnapshots(context.Background(), db, snapshotVersion, logger) histV3 := kvcfg.HistoryV3.FromDB(db) _blockReaderSingleton = freezeblocks.NewBlockReader(sn, borSn) _blockWriterSingleton = blockio.NewBlockWriter(histV3) @@ -1447,7 +1466,7 @@ func allDomains(ctx context.Context, db kv.RoDB, stepSize uint64, mode libstate. dir.MustExist(dirs.SnapHistory) snapCfg := ethconfig.NewSnapCfg(useSnapshots, true, true) - _allSnapshotsSingleton = freezeblocks.NewRoSnapshots(snapCfg, dirs.Snap, logger) + _allSnapshotsSingleton = freezeblocks.NewRoSnapshots(snapCfg, dirs.Snap, snapshotVersion, logger) var err error _aggDomainSingleton, err = libstate.NewAggregator(filepath.Join(dirs.DataDir, "state"), dirs.Tmp, stepSize, mode, trie, logger) @@ -1462,7 +1481,7 @@ func allDomains(ctx context.Context, db kv.RoDB, stepSize uint64, mode libstate. if err := _allSnapshotsSingleton.ReopenFolder(); err != nil { panic(err) } - _allSnapshotsSingleton.LogStat() + _allSnapshotsSingleton.LogStat("all:singleton") //db.View(context.Background(), func(tx kv.Tx) error { // _aggSingleton.LogStats(tx, func(endTxNumMinimax uint64) uint64 { // _, histBlockNumProgress, _ := rawdbv3.TxNums.FindBlockNum(tx, endTxNumMinimax) @@ -1539,7 +1558,7 @@ func newSync(ctx context.Context, db kv.RwDB, miningConfig *params.MiningConfig, cfg.Miner = *miningConfig } cfg.Dirs = datadir.New(datadirCli) - allSn, _, agg := allSnapshots(ctx, db, logger) + allSn, _, agg := allSnapshots(ctx, db, snapshotVersion, logger) cfg.Snapshot = allSn.Cfg() blockReader, blockWriter := blocksIO(db, logger) @@ -1583,7 +1602,7 @@ func newSync(ctx context.Context, db kv.RwDB, miningConfig *params.MiningConfig, } stages := stages2.NewDefaultStages(context.Background(), db, snapDb, p2p.Config{}, &cfg, sentryControlServer, notifications, nil, blockReader, blockRetire, agg, nil, nil, heimdallClient, recents, signatures, logger) - sync := stagedsync.New(stages, stagedsync.DefaultUnwindOrder, stagedsync.DefaultPruneOrder, logger) + sync := stagedsync.New(cfg.Sync, stages, stagedsync.DefaultUnwindOrder, stagedsync.DefaultPruneOrder, logger) miner := stagedsync.NewMiningState(&cfg.Miner) miningCancel := make(chan struct{}) @@ -1593,9 +1612,10 @@ func newSync(ctx context.Context, db kv.RwDB, miningConfig *params.MiningConfig, }() miningSync := stagedsync.New( + cfg.Sync, stagedsync.MiningStages(ctx, stagedsync.StageMiningCreateBlockCfg(db, miner, *chainConfig, engine, nil, nil, dirs.Tmp, blockReader), - stagedsync.StageBorHeimdallCfg(db, snapDb, miner, *chainConfig, heimdallClient, blockReader, nil, nil, recents, signatures), + stagedsync.StageBorHeimdallCfg(db, snapDb, miner, *chainConfig, heimdallClient, blockReader, nil, nil, nil, recents, signatures), stagedsync.StageMiningExecCfg(db, miner, events, *chainConfig, engine, &vm.Config{}, dirs.Tmp, nil, 0, nil, nil, blockReader), stagedsync.StageHashStateCfg(db, dirs, historyV3), stagedsync.StageTrieCfg(db, false, true, false, dirs.Tmp, blockReader, nil, historyV3, agg), diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index fabde89f2f4..cacbb6238de 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -5,12 +5,13 @@ import ( "encoding/hex" "errors" "fmt" - "github.com/ledgerwatch/erigon-lib/metrics" "path/filepath" "runtime" "strings" "time" + "github.com/ledgerwatch/erigon-lib/metrics" + "github.com/holiman/uint256" "github.com/ledgerwatch/log/v3" "github.com/spf13/cobra" @@ -92,7 +93,7 @@ var readDomains = &cobra.Command{ } dirs := datadir.New(datadirCli) - chainDb, err := openDB(dbCfg(kv.ChainDB, dirs.Chaindata), true, logger) + chainDb, err := openDB(dbCfg(kv.ChainDB, dirs.Chaindata), true, snapshotVersion, logger) if err != nil { logger.Error("Opening DB", "error", err) return diff --git a/cmd/integration/commands/state_stages.go b/cmd/integration/commands/state_stages.go index 3401cf669de..63733b5fc2f 100644 --- a/cmd/integration/commands/state_stages.go +++ b/cmd/integration/commands/state_stages.go @@ -6,11 +6,12 @@ import ( "encoding/json" "errors" "fmt" - "github.com/ledgerwatch/erigon-lib/kv/dbutils" "os" "sort" "time" + "github.com/ledgerwatch/erigon-lib/kv/dbutils" + "github.com/c2h5oh/datasize" chain2 "github.com/ledgerwatch/erigon-lib/chain" common2 "github.com/ledgerwatch/erigon-lib/common" @@ -63,7 +64,7 @@ Examples: erigoncli.ApplyFlagsForEthConfigCobra(cmd.Flags(), ethConfig) miningConfig := params.MiningConfig{} utils.SetupMinerCobra(cmd, &miningConfig) - db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, logger) + db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, snapshotVersion, logger) if err != nil { logger.Error("Opening DB", "error", err) return @@ -93,7 +94,7 @@ var loopIhCmd = &cobra.Command{ Run: func(cmd *cobra.Command, args []string) { logger := debug.SetupCobra(cmd, "integration") ctx, _ := common2.RootContext() - db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, logger) + db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, snapshotVersion, logger) if err != nil { logger.Error("Opening DB", "error", err) return @@ -117,7 +118,7 @@ var loopExecCmd = &cobra.Command{ Run: func(cmd *cobra.Command, args []string) { logger := debug.SetupCobra(cmd, "integration") ctx, _ := common2.RootContext() - db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, logger) + db, err := openDB(dbCfg(kv.ChainDB, chaindata), true, snapshotVersion, logger) if err != nil { logger.Error("Opening DB", "error", err) return @@ -147,6 +148,7 @@ func init() { withChain(stateStages) withHeimdall(stateStages) withWorkers(stateStages) + withSnapshotVersion(stateStages) rootCmd.AddCommand(stateStages) withConfig(loopIhCmd) @@ -155,6 +157,7 @@ func init() { withUnwind(loopIhCmd) withChain(loopIhCmd) withHeimdall(loopIhCmd) + withSnapshotVersion(loopIhCmd) rootCmd.AddCommand(loopIhCmd) withConfig(loopExecCmd) @@ -164,6 +167,7 @@ func init() { withChain(loopExecCmd) withHeimdall(loopExecCmd) withWorkers(loopExecCmd) + withSnapshotVersion(loopExecCmd) rootCmd.AddCommand(loopExecCmd) } @@ -173,7 +177,7 @@ func syncBySmallSteps(db kv.RwDB, miningConfig params.MiningConfig, ctx context. return err } - sn, borSn, agg := allSnapshots(ctx, db, logger1) + sn, borSn, agg := allSnapshots(ctx, db, snapshotVersion, logger1) defer sn.Close() defer borSn.Close() defer agg.Close() @@ -313,7 +317,7 @@ func syncBySmallSteps(db kv.RwDB, miningConfig params.MiningConfig, ctx context. stateStages.MockExecFunc(stages.Execution, execUntilFunc(execToBlock)) _ = stateStages.SetCurrentStage(stages.Execution) - if err := stateStages.Run(db, tx, false /* firstCycle */); err != nil { + if _, err := stateStages.Run(db, tx, false /* firstCycle */); err != nil { return err } @@ -371,7 +375,7 @@ func syncBySmallSteps(db kv.RwDB, miningConfig params.MiningConfig, ctx context. //}) _ = miningStages.SetCurrentStage(stages.MiningCreateBlock) - if err := miningStages.Run(db, tx, false /* firstCycle */); err != nil { + if _, err := miningStages.Run(db, tx, false /* firstCycle */); err != nil { return err } tx.Rollback() @@ -450,7 +454,7 @@ func checkMinedBlock(b1, b2 *types.Block, chainConfig *chain2.Config) { } func loopIh(db kv.RwDB, ctx context.Context, unwind uint64, logger log.Logger) error { - sn, borSn, agg := allSnapshots(ctx, db, logger) + sn, borSn, agg := allSnapshots(ctx, db, snapshotVersion, logger) defer sn.Close() defer borSn.Close() defer agg.Close() @@ -464,7 +468,7 @@ func loopIh(db kv.RwDB, ctx context.Context, unwind uint64, logger log.Logger) e } defer tx.Rollback() sync.DisableStages(stages.Snapshots, stages.Headers, stages.BlockHashes, stages.Bodies, stages.Senders, stages.Execution, stages.AccountHistoryIndex, stages.StorageHistoryIndex, stages.TxLookup, stages.Finish) - if err = sync.Run(db, tx, false /* firstCycle */); err != nil { + if _, err = sync.Run(db, tx, false /* firstCycle */); err != nil { return err } execStage := stage(sync, tx, nil, stages.HashState) @@ -488,7 +492,7 @@ func loopIh(db kv.RwDB, ctx context.Context, unwind uint64, logger log.Logger) e sync.DisableStages(stages.IntermediateHashes) _ = sync.SetCurrentStage(stages.HashState) - if err = sync.Run(db, tx, false /* firstCycle */); err != nil { + if _, err = sync.Run(db, tx, false /* firstCycle */); err != nil { return err } must(tx.Commit()) @@ -508,7 +512,7 @@ func loopIh(db kv.RwDB, ctx context.Context, unwind uint64, logger log.Logger) e _ = sync.SetCurrentStage(stages.IntermediateHashes) t := time.Now() - if err = sync.Run(db, tx, false /* firstCycle */); err != nil { + if _, err = sync.Run(db, tx, false /* firstCycle */); err != nil { return err } logger.Warn("loop", "time", time.Since(t).String()) @@ -524,7 +528,7 @@ func loopIh(db kv.RwDB, ctx context.Context, unwind uint64, logger log.Logger) e func loopExec(db kv.RwDB, ctx context.Context, unwind uint64, logger log.Logger) error { chainConfig := fromdb.ChainConfig(db) dirs, pm := datadir.New(datadirCli), fromdb.PruneMode(db) - sn, borSn, agg := allSnapshots(ctx, db, logger) + sn, borSn, agg := allSnapshots(ctx, db, snapshotVersion, logger) defer sn.Close() defer borSn.Close() defer agg.Close() @@ -579,7 +583,7 @@ func loopExec(db kv.RwDB, ctx context.Context, unwind uint64, logger log.Logger) _ = sync.SetCurrentStage(stages.Execution) t := time.Now() - if err = sync.Run(db, tx, initialCycle); err != nil { + if _, err = sync.Run(db, tx, initialCycle); err != nil { return err } logger.Info("[Integration] ", "loop time", time.Since(t)) diff --git a/cmd/p2psim/main.go b/cmd/p2psim/main.go index 3f567847bbd..e7202866500 100644 --- a/cmd/p2psim/main.go +++ b/cmd/p2psim/main.go @@ -39,12 +39,14 @@ import ( "context" "encoding/json" "fmt" - "github.com/ledgerwatch/erigon-lib/common" "io" "os" "strings" "text/tabwriter" + "github.com/ledgerwatch/erigon-lib/common" + "github.com/ledgerwatch/log/v3" + "github.com/ledgerwatch/erigon/turbo/logging" "github.com/urfave/cli/v2" @@ -70,7 +72,7 @@ func main() { }, } app.Before = func(ctx *cli.Context) error { - logger := logging.SetupLoggerCtx("p2psim", ctx, false /* rootLogger */) + logger := logging.SetupLoggerCtx("p2psim", ctx, log.LvlInfo, log.LvlInfo, false /* rootLogger */) client = simulations.NewClient(ctx.String("api"), logger) return nil } diff --git a/cmd/rpcdaemon/cli/config.go b/cmd/rpcdaemon/cli/config.go index 6daa354cf20..18127a88e02 100644 --- a/cmd/rpcdaemon/cli/config.go +++ b/cmd/rpcdaemon/cli/config.go @@ -14,6 +14,7 @@ import ( "time" "github.com/ledgerwatch/erigon-lib/chain" + "github.com/ledgerwatch/erigon-lib/chain/snapcfg" libcommon "github.com/ledgerwatch/erigon-lib/common" "github.com/ledgerwatch/erigon-lib/common/datadir" "github.com/ledgerwatch/erigon-lib/common/dir" @@ -372,15 +373,17 @@ func RemoteServices(ctx context.Context, cfg *httpcfg.HttpCfg, logger log.Logger logger.Info("Use --snapshots=false") } + snapshotVersion := snapcfg.KnownCfg(cc.ChainName, 0).Version + // Configure sapshots - allSnapshots = freezeblocks.NewRoSnapshots(cfg.Snap, cfg.Dirs.Snap, logger) - allBorSnapshots = freezeblocks.NewBorRoSnapshots(cfg.Snap, cfg.Dirs.Snap, logger) + allSnapshots = freezeblocks.NewRoSnapshots(cfg.Snap, cfg.Dirs.Snap, snapshotVersion, logger) + allBorSnapshots = freezeblocks.NewBorRoSnapshots(cfg.Snap, cfg.Dirs.Snap, snapshotVersion, logger) // To povide good UX - immediatly can read snapshots after RPCDaemon start, even if Erigon is down // Erigon does store list of snapshots in db: means RPCDaemon can read this list now, but read by `remoteKvClient.Snapshots` after establish grpc connection allSnapshots.OptimisticReopenWithDB(db) allBorSnapshots.OptimisticalyReopenWithDB(db) - allSnapshots.LogStat() - allBorSnapshots.LogStat() + allSnapshots.LogStat("remote") + allBorSnapshots.LogStat("remote") if agg, err = libstate.NewAggregatorV3(ctx, cfg.Dirs.SnapHistory, cfg.Dirs.Tmp, ethconfig.HistoryV3AggregationStep, db, logger); err != nil { return nil, nil, nil, nil, nil, nil, nil, ff, nil, fmt.Errorf("create aggregator: %w", err) @@ -404,12 +407,12 @@ func RemoteServices(ctx context.Context, cfg *httpcfg.HttpCfg, logger log.Logger if err := allSnapshots.ReopenList(reply.BlocksFiles, true); err != nil { logger.Error("[snapshots] reopen", "err", err) } else { - allSnapshots.LogStat() + allSnapshots.LogStat("reopen") } if err := allBorSnapshots.ReopenList(reply.BlocksFiles, true); err != nil { logger.Error("[bor snapshots] reopen", "err", err) } else { - allSnapshots.LogStat() + allBorSnapshots.LogStat("reopen") } _ = reply.HistoryFiles diff --git a/cmd/silkworm_api/snapshot_idx.go b/cmd/silkworm_api/snapshot_idx.go index 8353255451b..8f728ddf06f 100644 --- a/cmd/silkworm_api/snapshot_idx.go +++ b/cmd/silkworm_api/snapshot_idx.go @@ -6,6 +6,7 @@ import ( "path/filepath" "time" + "github.com/ledgerwatch/erigon-lib/chain/snapcfg" "github.com/ledgerwatch/erigon-lib/common/background" "github.com/ledgerwatch/erigon-lib/common/datadir" "github.com/ledgerwatch/erigon-lib/downloader/snaptype" @@ -37,7 +38,7 @@ func main() { }, }, Action: func(cCtx *cli.Context) error { - return buildIndex(cCtx, cCtx.String("datadir"), cCtx.StringSlice("snapshot_path")) + return buildIndex(cCtx, cCtx.String("datadir"), cCtx.StringSlice("snapshot_path"), 0) }, } @@ -55,7 +56,7 @@ func FindIf(segments []snaptype.FileInfo, predicate func(snaptype.FileInfo) bool return snaptype.FileInfo{}, false // Return zero value and false if not found } -func buildIndex(cliCtx *cli.Context, dataDir string, snapshotPaths []string) error { +func buildIndex(cliCtx *cli.Context, dataDir string, snapshotPaths []string, minBlock uint64) error { logger, _, err := debug.Setup(cliCtx, true /* rootLogger */) if err != nil { return err @@ -75,7 +76,7 @@ func buildIndex(cliCtx *cli.Context, dataDir string, snapshotPaths []string) err chainConfig := fromdb.ChainConfig(chainDB) - segments, _, err := freezeblocks.Segments(dirs.Snap) + segments, _, err := freezeblocks.Segments(dirs.Snap, snapcfg.KnownCfg(chainConfig.ChainName, 0).Version, minBlock) if err != nil { return err } @@ -97,7 +98,7 @@ func buildIndex(cliCtx *cli.Context, dataDir string, snapshotPaths []string) err jobProgress := &background.Progress{} ps.Add(jobProgress) defer ps.Delete(jobProgress) - return freezeblocks.HeadersIdx(ctx, segment.Path, segment.From, dirs.Tmp, jobProgress, logLevel, logger) + return freezeblocks.HeadersIdx(ctx, segment.Path, segment.Version, segment.From, dirs.Tmp, jobProgress, logLevel, logger) }) case snaptype.Bodies: g.Go(func() error { @@ -112,7 +113,7 @@ func buildIndex(cliCtx *cli.Context, dataDir string, snapshotPaths []string) err ps.Add(jobProgress) defer ps.Delete(jobProgress) dir, _ := filepath.Split(segment.Path) - return freezeblocks.TransactionsIdx(ctx, chainConfig, segment.From, segment.To, dir, dirs.Tmp, jobProgress, logLevel, logger) + return freezeblocks.TransactionsIdx(ctx, chainConfig, segment.Version, segment.From, segment.To, dir, dirs.Tmp, jobProgress, logLevel, logger) }) } } diff --git a/cmd/snapshots/README.md b/cmd/snapshots/README.md new file mode 100644 index 00000000000..110f2d20ab4 --- /dev/null +++ b/cmd/snapshots/README.md @@ -0,0 +1,79 @@ +# Snapshots - tool for managing remote stanshots + +In the root of `Erigon` project, use this command to build the the commands: + +```shell +make snapshots +``` + +It can then be run using the following command + +```shell +./buid/bin/snapshots sub-command options... +``` + +Snapshots supports the following sub commands: + +## cmp - compare snapshots + +This command takes the follwoing form: + +```shell + snapshots cmp +``` + +This will cause the .seg files from each location to be copied to the local machine, indexed and then have their rlp contents compared. + +Optionally a `` and optionally an `` may be specified to limit the scope of the operation + +It is also possible to set the `--types` flag to limit the type of segment file being downloaded and compared. The currently supported types are `header` and `body` + +## copy - copy snapshots + +This command can be used to copy segment files from one location to another. + +This command takes the follwoing form: + +```shell + snapshots copy +``` + +Optionally a `` and optionally an `` may be specified to limit the scope of the operation + +## verify - verify snapshots + +-- TBD + +## manifest - manage the manifest file in the root of remote snapshot locations + +The `manifest` command supports the following actions + +| Action | Description | +|--------|-------------| +| list | list manifest from storage location| +| update | update the manifest to match the files available at its storage location | +| verify |verify that manifest matches the files available at its storage location| + +All actions take a `` argument which specified the remote location which contains the manifest + +Optionally a `` and optionally an `` may be specified to limit the scope of the operation + +## torrent - manage snapshot torrent files + +The `torrent` command supports the following actions + +| Action | Description | +|--------|-------------| +| list | list torrents available at the specified storage location | +| hashes | list the hashes (in toml format) at the specified storage location | +| update | update re-create the torrents for the contents available at its storage location | +| verify |verify that manifest contents are available at its storage location| + +All actions take a `` argument which specified the remote location which contains the torrents. + +Optionally a ``` and optionally an `` may be specified to limit the scope of the operation + + + + + diff --git a/cmd/snapshots/cmp/cmp.go b/cmd/snapshots/cmp/cmp.go new file mode 100644 index 00000000000..2ba6e0fde47 --- /dev/null +++ b/cmd/snapshots/cmp/cmp.go @@ -0,0 +1,788 @@ +package cmp + +import ( + "bytes" + "context" + "fmt" + "io/fs" + "os" + "path/filepath" + "strconv" + "sync/atomic" + "time" + + "github.com/c2h5oh/datasize" + "github.com/ledgerwatch/erigon-lib/chain" + "github.com/ledgerwatch/erigon-lib/common" + "github.com/ledgerwatch/erigon-lib/downloader" + "github.com/ledgerwatch/erigon-lib/downloader/snaptype" + "github.com/ledgerwatch/erigon/cmd/snapshots/flags" + "github.com/ledgerwatch/erigon/cmd/snapshots/sync" + "github.com/ledgerwatch/erigon/cmd/utils" + "github.com/ledgerwatch/erigon/core/types" + "github.com/ledgerwatch/erigon/eth/ethconfig" + "github.com/ledgerwatch/erigon/params" + "github.com/ledgerwatch/erigon/turbo/logging" + "github.com/ledgerwatch/erigon/turbo/snapshotsync/freezeblocks" + "github.com/ledgerwatch/log/v3" + "github.com/urfave/cli/v2" + "golang.org/x/sync/errgroup" +) + +var Command = cli.Command{ + Action: cmp, + Name: "cmp", + Usage: "Compare snapshot segments", + ArgsUsage: " ", + Flags: []cli.Flag{ + &flags.SegTypes, + &utils.DataDirFlag, + &logging.LogVerbosityFlag, + &logging.LogConsoleVerbosityFlag, + &logging.LogDirVerbosityFlag, + &utils.WebSeedsFlag, + &utils.NATFlag, + &utils.DisableIPV6, + &utils.DisableIPV4, + &utils.TorrentDownloadRateFlag, + &utils.TorrentUploadRateFlag, + &utils.TorrentVerbosityFlag, + &utils.TorrentPortFlag, + &utils.TorrentMaxPeersFlag, + &utils.TorrentConnsPerFileFlag, + }, + Description: ``, +} + +func cmp(cliCtx *cli.Context) error { + + logger := sync.Logger(cliCtx.Context) + + var loc1, loc2 *sync.Locator + + var rcCli *downloader.RCloneClient + var torrentCli *sync.TorrentClient + + dataDir := cliCtx.String(utils.DataDirFlag.Name) + var tempDir string + + if len(dataDir) == 0 { + dataDir, err := os.MkdirTemp("", "snapshot-cpy-") + if err != nil { + return err + } + tempDir = dataDir + defer os.RemoveAll(dataDir) + } else { + tempDir = filepath.Join(dataDir, "temp") + + if err := os.MkdirAll(tempDir, 0755); err != nil { + return err + } + } + + cliCtx.Context = sync.WithTempDir(cliCtx.Context, tempDir) + + var err error + + checkRemote := func(src string) error { + if rcCli == nil { + rcCli, err = downloader.NewRCloneClient(logger) + + if err != nil { + return err + } + } + + return sync.CheckRemote(rcCli, src) + } + + var chain string + + pos := 0 + + if cliCtx.Args().Len() > pos { + val := cliCtx.Args().Get(pos) + + if loc1, err = sync.ParseLocator(val); err != nil { + return err + } + + switch loc1.LType { + case sync.RemoteFs: + if err = checkRemote(loc1.Src); err != nil { + return err + } + + chain = loc1.Chain + } + } + + pos++ + + if cliCtx.Args().Len() > pos { + val := cliCtx.Args().Get(pos) + + if loc2, err = sync.ParseLocator(val); err != nil { + return err + } + + switch loc2.LType { + case sync.RemoteFs: + if err = checkRemote(loc2.Src); err != nil { + return err + } + + chain = loc2.Chain + } + + pos++ + } + + if loc1.LType == sync.TorrentFs || loc2.LType == sync.TorrentFs { + torrentCli, err = sync.NewTorrentClient(cliCtx, chain) + if err != nil { + return fmt.Errorf("can't create torrent: %w", err) + } + } + + typeValues := cliCtx.StringSlice(flags.SegTypes.Name) + snapTypes := make([]snaptype.Type, 0, len(typeValues)) + + for _, val := range typeValues { + segType, ok := snaptype.ParseFileType(val) + + if !ok { + return fmt.Errorf("unknown file type: %s", val) + } + + snapTypes = append(snapTypes, segType) + } + + var firstBlock, lastBlock uint64 + + if cliCtx.Args().Len() > pos { + firstBlock, err = strconv.ParseUint(cliCtx.Args().Get(0), 10, 64) + } + + if cliCtx.Args().Len() > 1 { + lastBlock, err = strconv.ParseUint(cliCtx.Args().Get(1), 10, 64) + } + + var session1 sync.DownloadSession + var session2 sync.DownloadSession + + if rcCli != nil { + if loc1.LType == sync.RemoteFs { + session1, err = rcCli.NewSession(cliCtx.Context, filepath.Join(tempDir, "l1"), loc1.Src+":"+loc1.Root) + + if err != nil { + return err + } + } + + if loc2.LType == sync.RemoteFs { + session2, err = rcCli.NewSession(cliCtx.Context, filepath.Join(tempDir, "l2"), loc2.Src+":"+loc2.Root) + + if err != nil { + return err + } + } + } + + if torrentCli != nil { + if loc1.LType == sync.TorrentFs { + session1 = sync.NewTorrentSession(torrentCli, chain) + } + + if loc2.LType == sync.TorrentFs { + session2 = sync.NewTorrentSession(torrentCli, chain) + } + } + + if session1 == nil { + return fmt.Errorf("no first session established") + } + + if session1 == nil { + return fmt.Errorf("no second session established") + } + + logger.Info(fmt.Sprintf("Starting compare: %s==%s", loc1.String(), loc2.String()), "first", firstBlock, "last", lastBlock, "types", snapTypes, "dir", tempDir) + + logger.Info("Reading s1 dir", "remoteFs", session1.RemoteFsRoot(), "label", session1.Label()) + files, err := sync.DownloadManifest(cliCtx.Context, session1) + + if err != nil { + files, err = session1.ReadRemoteDir(cliCtx.Context, true) + } + + if err != nil { + return err + } + + h1ents, b1ents := splitEntries(files, loc1.Version, firstBlock, lastBlock) + + logger.Info("Reading s2 dir", "remoteFs", session2.RemoteFsRoot(), "label", session2.Label()) + files, err = sync.DownloadManifest(cliCtx.Context, session2) + + if err != nil { + files, err = session2.ReadRemoteDir(cliCtx.Context, true) + } + + if err != nil { + return err + } + + h2ents, b2ents := splitEntries(files, loc2.Version, firstBlock, lastBlock) + + c := comparitor{ + chain: chain, + loc1: loc1, + loc2: loc2, + session1: session1, + session2: session2, + } + + var funcs []func(ctx context.Context) (time.Duration, time.Duration, time.Duration, error) + + bodyWorkers := 4 + headerWorkers := 4 + + if len(snapTypes) == 0 { + funcs = append(funcs, func(ctx context.Context) (time.Duration, time.Duration, time.Duration, error) { + return c.compareHeaders(ctx, h1ents, h2ents, headerWorkers, logger) + }, func(ctx context.Context) (time.Duration, time.Duration, time.Duration, error) { + return c.compareBodies(ctx, b1ents, b2ents, bodyWorkers, logger) + }) + } else { + for _, snapType := range snapTypes { + if snapType == snaptype.Headers { + funcs = append(funcs, func(ctx context.Context) (time.Duration, time.Duration, time.Duration, error) { + return c.compareHeaders(ctx, h1ents, h2ents, headerWorkers, logger) + }) + } + + if snapType == snaptype.Bodies { + funcs = append(funcs, func(ctx context.Context) (time.Duration, time.Duration, time.Duration, error) { + return c.compareBodies(ctx, b1ents, b2ents, bodyWorkers, logger) + }) + } + } + } + + if len(funcs) > 0 { + startTime := time.Now() + + var downloadTime uint64 + var indexTime uint64 + var compareTime uint64 + + g, ctx := errgroup.WithContext(cliCtx.Context) + g.SetLimit(len(funcs)) + + for _, f := range funcs { + func(ctx context.Context, f func(ctx context.Context) (time.Duration, time.Duration, time.Duration, error)) { + g.Go(func() error { + dt, it, ct, err := f(ctx) + + atomic.AddUint64(&downloadTime, uint64(dt)) + atomic.AddUint64(&indexTime, uint64(it)) + atomic.AddUint64(&compareTime, uint64(ct)) + + return err + }) + }(ctx, f) + } + + err = g.Wait() + + if err == nil { + logger.Info(fmt.Sprintf("Finished compare: %s==%s", loc1.String(), loc2.String()), "elapsed", time.Since(startTime), + "downloading", time.Duration(downloadTime), "indexing", time.Duration(indexTime), "comparing", time.Duration(compareTime)) + } else { + logger.Info(fmt.Sprintf("Failed compare: %s==%s", loc1.String(), loc2.String()), "err", err, "elapsed", time.Since(startTime), + "downloading", time.Duration(downloadTime), "indexing", time.Duration(indexTime), "comparing", time.Duration(compareTime)) + } + + } + return nil +} + +type BodyEntry struct { + From, To uint64 + Body, Transactions fs.DirEntry +} + +func splitEntries(files []fs.DirEntry, version uint8, firstBlock, lastBlock uint64) (hents []fs.DirEntry, bents []*BodyEntry) { + for _, ent := range files { + if info, err := ent.Info(); err == nil { + if snapInfo, ok := info.Sys().(downloader.SnapInfo); ok && snapInfo.Version() > 0 { + if version == snapInfo.Version() && + (firstBlock == 0 || snapInfo.From() >= firstBlock) && + (lastBlock == 0 || snapInfo.From() < lastBlock) { + + if snapInfo.Type() == snaptype.Headers { + hents = append(hents, ent) + } + + if snapInfo.Type() == snaptype.Bodies { + found := false + + for _, bent := range bents { + if snapInfo.From() == bent.From && + snapInfo.To() == bent.To { + bent.Body = ent + found = true + } + } + + if !found { + bents = append(bents, &BodyEntry{snapInfo.From(), snapInfo.To(), ent, nil}) + } + } + + if snapInfo.Type() == snaptype.Transactions { + found := false + + for _, bent := range bents { + if snapInfo.From() == bent.From && + snapInfo.To() == bent.To { + bent.Transactions = ent + found = true + + } + } + + if !found { + bents = append(bents, &BodyEntry{snapInfo.From(), snapInfo.To(), nil, ent}) + } + } + } + } + } + } + + return hents, bents +} + +type comparitor struct { + chain string + loc1, loc2 *sync.Locator + session1 sync.DownloadSession + session2 sync.DownloadSession +} + +func (c comparitor) chainConfig() *chain.Config { + return params.ChainConfigByChainName(c.chain) +} + +func (c comparitor) compareHeaders(ctx context.Context, f1ents []fs.DirEntry, f2ents []fs.DirEntry, workers int, logger log.Logger) (time.Duration, time.Duration, time.Duration, error) { + var downloadTime uint64 + var compareTime uint64 + + g, ctx := errgroup.WithContext(ctx) + g.SetLimit(workers) + + for i1, ent1 := range f1ents { + var snapInfo1 downloader.SnapInfo + + if info, err := ent1.Info(); err == nil { + snapInfo1, _ = info.Sys().(downloader.SnapInfo) + } + + if snapInfo1 == nil { + continue + } + + for i2, ent2 := range f2ents { + + var snapInfo2 downloader.SnapInfo + + ent2Info, err := ent2.Info() + + if err == nil { + snapInfo2, _ = ent2Info.Sys().(downloader.SnapInfo) + } + + if snapInfo2 == nil || + snapInfo1.Type() != snapInfo2.Type() || + snapInfo1.From() != snapInfo2.From() || + snapInfo1.To() != snapInfo2.To() { + continue + } + + i1, i2, ent1, ent2 := i1, i2, ent1, ent2 + + g.Go(func() error { + g, gctx := errgroup.WithContext(ctx) + g.SetLimit(2) + + g.Go(func() error { + logger.Info(fmt.Sprintf("Downloading %s", ent1.Name()), "entry", fmt.Sprint(i1+1, "/", len(f1ents))) + startTime := time.Now() + defer func() { + atomic.AddUint64(&downloadTime, uint64(time.Since(startTime))) + }() + + err := c.session1.Download(gctx, ent1.Name()) + + if err != nil { + return err + } + + return nil + }) + + g.Go(func() error { + startTime := time.Now() + defer func() { + atomic.AddUint64(&downloadTime, uint64(time.Since(startTime))) + }() + + logger.Info(fmt.Sprintf("Downloading %s", ent2.Name()), "entry", fmt.Sprint(i2+1, "/", len(f2ents)), "size", datasize.ByteSize(ent2Info.Size())) + err := c.session2.Download(gctx, ent2.Name()) + + if err != nil { + return err + } + + return nil + }) + + if err := g.Wait(); err != nil { + return err + } + + f1snaps := freezeblocks.NewRoSnapshots(ethconfig.BlocksFreezing{ + Enabled: true, + Produce: false, + NoDownloader: true, + }, c.session1.LocalFsRoot(), c.loc1.Version, logger) + + f1snaps.ReopenList([]string{ent1.Name()}, false) + + f2snaps := freezeblocks.NewRoSnapshots(ethconfig.BlocksFreezing{ + Enabled: true, + Produce: false, + NoDownloader: true, + }, c.session2.LocalFsRoot(), c.loc2.Version, logger) + + f2snaps.ReopenList([]string{ent2.Name()}, false) + + err = func() error { + logger.Info(fmt.Sprintf("Comparing %s %s", ent1.Name(), ent2.Name())) + startTime := time.Now() + + defer func() { + atomic.AddUint64(&compareTime, uint64(time.Since(startTime))) + }() + + blockReader1 := freezeblocks.NewBlockReader(f1snaps, nil) + blockReader2 := freezeblocks.NewBlockReader(f2snaps, nil) + + g, gctx = errgroup.WithContext(ctx) + g.SetLimit(2) + + h2chan := make(chan *types.Header) + + g.Go(func() error { + blockReader2.HeadersRange(gctx, func(h2 *types.Header) error { + select { + case h2chan <- h2: + return nil + case <-gctx.Done(): + return gctx.Err() + } + }) + + close(h2chan) + return nil + }) + + g.Go(func() error { + err := blockReader1.HeadersRange(gctx, func(h1 *types.Header) error { + select { + case h2 := <-h2chan: + if h2 == nil { + return fmt.Errorf("header %d unknown", h1.Number.Uint64()) + } + + if h1.Number.Uint64() != h2.Number.Uint64() { + return fmt.Errorf("mismatched headers: expected %d, Got: %d", h1.Number.Uint64(), h2.Number.Uint64()) + } + + var h1buf, h2buf bytes.Buffer + + h1.EncodeRLP(&h1buf) + h2.EncodeRLP(&h2buf) + + if !bytes.Equal(h1buf.Bytes(), h2buf.Bytes()) { + return fmt.Errorf("%d: headers do not match", h1.Number.Uint64()) + } + + return nil + case <-gctx.Done(): + return gctx.Err() + } + }) + + return err + }) + + return g.Wait() + }() + + files := f1snaps.OpenFiles() + f1snaps.Close() + + files = append(files, f2snaps.OpenFiles()...) + f2snaps.Close() + + for _, file := range files { + os.Remove(file) + } + + return err + }) + } + } + + err := g.Wait() + + return time.Duration(downloadTime), 0, time.Duration(compareTime), err +} + +func (c comparitor) compareBodies(ctx context.Context, f1ents []*BodyEntry, f2ents []*BodyEntry, workers int, logger log.Logger) (time.Duration, time.Duration, time.Duration, error) { + var downloadTime uint64 + var indexTime uint64 + var compareTime uint64 + + g, ctx := errgroup.WithContext(ctx) + g.SetLimit(workers) + + for i1, ent1 := range f1ents { + for i2, ent2 := range f2ents { + if ent1.From != ent2.From || + ent1.To != ent2.To { + continue + } + + i1, i2, ent1, ent2 := i1, i2, ent1, ent2 + + g.Go(func() error { + g, ctx := errgroup.WithContext(ctx) + g.SetLimit(4) + + b1err := make(chan error, 1) + + g.Go(func() error { + + err := func() error { + startTime := time.Now() + + defer func() { + atomic.AddUint64(&downloadTime, uint64(time.Since(startTime))) + }() + + logger.Info(fmt.Sprintf("Downloading %s", ent1.Body.Name()), "entry", fmt.Sprint(i1+1, "/", len(f1ents))) + return c.session1.Download(ctx, ent1.Body.Name()) + }() + + b1err <- err + + if err != nil { + return fmt.Errorf("can't download %s: %w", ent1.Body.Name(), err) + } + + startTime := time.Now() + + defer func() { + atomic.AddUint64(&indexTime, uint64(time.Since(startTime))) + }() + + logger.Info(fmt.Sprintf("Indexing %s", ent1.Body.Name())) + return freezeblocks.BodiesIdx(ctx, + filepath.Join(c.session1.LocalFsRoot(), ent1.Body.Name()), ent1.From, c.session1.LocalFsRoot(), nil, log.LvlDebug, logger) + }) + + g.Go(func() error { + err := func() error { + startTime := time.Now() + + defer func() { + atomic.AddUint64(&downloadTime, uint64(time.Since(startTime))) + }() + logger.Info(fmt.Sprintf("Downloading %s", ent1.Transactions.Name()), "entry", fmt.Sprint(i1+1, "/", len(f1ents))) + return c.session1.Download(ctx, ent1.Transactions.Name()) + }() + + if err != nil { + return fmt.Errorf("can't download %s: %w", ent1.Transactions.Name(), err) + } + + select { + case <-ctx.Done(): + return ctx.Err() + case err = <-b1err: + if err != nil { + return fmt.Errorf("can't create transaction index: no bodies: %w", err) + } + } + + startTime := time.Now() + + defer func() { + atomic.AddUint64(&indexTime, uint64(time.Since(startTime))) + }() + + logger.Info(fmt.Sprintf("Indexing %s", ent1.Transactions.Name())) + return freezeblocks.TransactionsIdx(ctx, c.chainConfig(), c.loc1.Version, ent1.From, ent1.To, + c.session1.LocalFsRoot(), c.session1.LocalFsRoot(), nil, log.LvlDebug, logger) + }) + + b2err := make(chan error, 1) + + g.Go(func() error { + err := func() error { + startTime := time.Now() + + defer func() { + atomic.AddUint64(&downloadTime, uint64(time.Since(startTime))) + }() + + logger.Info(fmt.Sprintf("Downloading %s", ent2.Body.Name()), "entry", fmt.Sprint(i2+1, "/", len(f2ents))) + return c.session2.Download(ctx, ent2.Body.Name()) + }() + + b2err <- err + + if err != nil { + return fmt.Errorf("can't download %s: %w", ent2.Body.Name(), err) + } + + startTime := time.Now() + + defer func() { + atomic.AddUint64(&indexTime, uint64(time.Since(startTime))) + }() + + logger.Info(fmt.Sprintf("Indexing %s", ent2.Body.Name())) + return freezeblocks.BodiesIdx(ctx, + filepath.Join(c.session2.LocalFsRoot(), ent2.Body.Name()), ent2.From, c.session1.LocalFsRoot(), nil, log.LvlDebug, logger) + }) + + g.Go(func() error { + err := func() error { + startTime := time.Now() + + defer func() { + atomic.AddUint64(&downloadTime, uint64(time.Since(startTime))) + }() + logger.Info(fmt.Sprintf("Downloading %s", ent2.Transactions.Name()), "entry", fmt.Sprint(i2+1, "/", len(f2ents))) + return c.session2.Download(ctx, ent2.Transactions.Name()) + }() + + if err != nil { + return fmt.Errorf("can't download %s: %w", ent2.Transactions.Name(), err) + } + + select { + case <-ctx.Done(): + return ctx.Err() + case err = <-b2err: + if err != nil { + return fmt.Errorf("can't create transaction index: no bodies: %w", err) + } + } + + startTime := time.Now() + + defer func() { + atomic.AddUint64(&indexTime, uint64(time.Since(startTime))) + }() + + logger.Info(fmt.Sprintf("Indexing %s", ent2.Transactions.Name())) + return freezeblocks.TransactionsIdx(ctx, c.chainConfig(), c.loc2.Version, ent2.From, ent2.To, + c.session2.LocalFsRoot(), c.session2.LocalFsRoot(), nil, log.LvlDebug, logger) + }) + + if err := g.Wait(); err != nil { + return err + } + + f1snaps := freezeblocks.NewRoSnapshots(ethconfig.BlocksFreezing{ + Enabled: true, + Produce: false, + NoDownloader: true, + }, c.session1.LocalFsRoot(), c.loc1.Version, logger) + + f1snaps.ReopenList([]string{ent1.Body.Name(), ent1.Transactions.Name()}, false) + + f2snaps := freezeblocks.NewRoSnapshots(ethconfig.BlocksFreezing{ + Enabled: true, + Produce: false, + NoDownloader: true, + }, c.session2.LocalFsRoot(), c.loc2.Version, logger) + + f2snaps.ReopenList([]string{ent2.Body.Name(), ent2.Transactions.Name()}, false) + + err := func() error { + logger.Info(fmt.Sprintf("Comparing %s %s", ent1.Body.Name(), ent2.Body.Name())) + + startTime := time.Now() + + defer func() { + atomic.AddUint64(&compareTime, uint64(time.Since(startTime))) + }() + + blockReader1 := freezeblocks.NewBlockReader(f1snaps, nil) + blockReader2 := freezeblocks.NewBlockReader(f2snaps, nil) + + return func() error { + for i := ent1.From; i < ent1.To; i++ { + body1, err := blockReader1.BodyWithTransactions(ctx, nil, common.Hash{}, i) + + if err != nil { + return fmt.Errorf("%d: can't get body 1: %w", i, err) + } + + body2, err := blockReader2.BodyWithTransactions(ctx, nil, common.Hash{}, i) + + if err != nil { + return fmt.Errorf("%d: can't get body 2: %w", i, err) + } + + var b1buf, b2buf bytes.Buffer + + body1.EncodeRLP(&b1buf) + body2.EncodeRLP(&b2buf) + + if !bytes.Equal(b1buf.Bytes(), b2buf.Bytes()) { + return fmt.Errorf("%d: bodies do not match", i) + } + } + + return nil + }() + }() + + files := f1snaps.OpenFiles() + f1snaps.Close() + + files = append(files, f2snaps.OpenFiles()...) + f2snaps.Close() + + for _, file := range files { + os.Remove(file) + } + + return err + }) + } + } + + err := g.Wait() + + return time.Duration(downloadTime), time.Duration(indexTime), time.Duration(compareTime), err +} diff --git a/cmd/snapshots/copy/copy.go b/cmd/snapshots/copy/copy.go new file mode 100644 index 00000000000..4faebc1c6bc --- /dev/null +++ b/cmd/snapshots/copy/copy.go @@ -0,0 +1,333 @@ +package copy + +import ( + "context" + "fmt" + "io/fs" + "path/filepath" + "strconv" + "strings" + + "github.com/ledgerwatch/erigon-lib/downloader" + "github.com/ledgerwatch/erigon-lib/downloader/snaptype" + "github.com/ledgerwatch/erigon/cmd/snapshots/flags" + "github.com/ledgerwatch/erigon/cmd/snapshots/sync" + "github.com/ledgerwatch/erigon/cmd/utils" + "github.com/ledgerwatch/erigon/turbo/logging" + "github.com/urfave/cli/v2" +) + +var ( + TorrentsFlag = cli.BoolFlag{ + Name: "torrents", + Usage: `Include torrent files in copy`, + Required: false, + } + + HashesFlag = cli.BoolFlag{ + Name: "hashes", + Usage: `Include hash .toml in copy`, + Required: false, + } + + ManifestFlag = cli.BoolFlag{ + Name: "manifest", + Usage: `Include mannfest .txt in copy`, + Required: false, + } + + VersionFlag = cli.IntFlag{ + Name: "version", + Usage: `File versions to copy`, + Required: false, + Value: 0, + } +) + +var Command = cli.Command{ + Action: copy, + Name: "copy", + Usage: "copy snapshot segments", + ArgsUsage: " ", + Flags: []cli.Flag{ + &VersionFlag, + &flags.SegTypes, + &TorrentsFlag, + &HashesFlag, + &ManifestFlag, + &utils.DataDirFlag, + &logging.LogVerbosityFlag, + &logging.LogConsoleVerbosityFlag, + &logging.LogDirVerbosityFlag, + &utils.WebSeedsFlag, + &utils.NATFlag, + &utils.DisableIPV6, + &utils.DisableIPV4, + &utils.TorrentDownloadRateFlag, + &utils.TorrentUploadRateFlag, + &utils.TorrentVerbosityFlag, + &utils.TorrentPortFlag, + &utils.TorrentMaxPeersFlag, + &utils.TorrentConnsPerFileFlag, + }, + Description: ``, +} + +func copy(cliCtx *cli.Context) error { + logger := sync.Logger(cliCtx.Context) + + logger.Info("Starting copy") + + var src, dst *sync.Locator + var err error + + var rcCli *downloader.RCloneClient + var torrentCli *sync.TorrentClient + + pos := 0 + + if cliCtx.Args().Len() > pos { + val := cliCtx.Args().Get(pos) + + if src, err = sync.ParseLocator(val); err != nil { + return err + } + } + + pos++ + + if cliCtx.Args().Len() > pos { + val := cliCtx.Args().Get(pos) + + if src, err = sync.ParseLocator(val); err != nil { + return err + } + + pos++ + } + + switch dst.LType { + case sync.TorrentFs: + return fmt.Errorf("can't copy to torrent - need intermediate local fs") + + case sync.RemoteFs: + if rcCli == nil { + rcCli, err = downloader.NewRCloneClient(logger) + + if err != nil { + return err + } + } + + if err = sync.CheckRemote(rcCli, src.Src); err != nil { + return err + } + } + + switch src.LType { + case sync.TorrentFs: + torrentCli, err = sync.NewTorrentClient(cliCtx, dst.Chain) + if err != nil { + return fmt.Errorf("can't create torrent: %w", err) + } + + case sync.RemoteFs: + if rcCli == nil { + rcCli, err = downloader.NewRCloneClient(logger) + + if err != nil { + return err + } + } + + if err = sync.CheckRemote(rcCli, src.Src); err != nil { + return err + } + } + + typeValues := cliCtx.StringSlice(flags.SegTypes.Name) + snapTypes := make([]snaptype.Type, 0, len(typeValues)) + + for _, val := range typeValues { + segType, ok := snaptype.ParseFileType(val) + + if !ok { + return fmt.Errorf("unknown file type: %s", val) + } + + snapTypes = append(snapTypes, segType) + } + + torrents := cliCtx.Bool(TorrentsFlag.Name) + hashes := cliCtx.Bool(HashesFlag.Name) + manifest := cliCtx.Bool(ManifestFlag.Name) + + var firstBlock, lastBlock uint64 + + version := cliCtx.Int(VersionFlag.Name) + + if version != 0 { + dst.Version = uint8(version) + } + + if cliCtx.Args().Len() > pos { + if firstBlock, err = strconv.ParseUint(cliCtx.Args().Get(pos), 10, 64); err != nil { + return err + } + + pos++ + } + + if cliCtx.Args().Len() > pos { + if lastBlock, err = strconv.ParseUint(cliCtx.Args().Get(pos), 10, 64); err != nil { + return err + } + } + + switch src.LType { + case sync.LocalFs: + switch dst.LType { + case sync.LocalFs: + return localToLocal(src, dst, firstBlock, lastBlock, snapTypes, torrents, hashes, manifest) + case sync.RemoteFs: + return localToRemote(rcCli, src, dst, firstBlock, lastBlock, snapTypes, torrents, hashes, manifest) + default: + return fmt.Errorf("unhandled torrent destination: %s", dst) + } + + case sync.RemoteFs: + switch dst.LType { + case sync.LocalFs: + return remoteToLocal(cliCtx.Context, rcCli, src, dst, firstBlock, lastBlock, snapTypes, torrents, hashes, manifest) + case sync.RemoteFs: + return remoteToRemote(rcCli, src, dst, firstBlock, lastBlock, snapTypes, torrents, hashes, manifest) + default: + return fmt.Errorf("unhandled torrent destination: %s", dst) + } + + case sync.TorrentFs: + switch dst.LType { + case sync.LocalFs: + return torrentToLocal(torrentCli, src, dst, firstBlock, lastBlock, snapTypes, torrents, hashes, manifest) + case sync.RemoteFs: + return torrentToRemote(torrentCli, rcCli, src, dst, firstBlock, lastBlock, snapTypes, torrents, hashes, manifest) + default: + return fmt.Errorf("unhandled torrent destination: %s", dst) + } + + } + return nil +} + +func torrentToLocal(torrentCli *sync.TorrentClient, src *sync.Locator, dst *sync.Locator, from uint64, to uint64, snapTypes []snaptype.Type, torrents, hashes, manifest bool) error { + return fmt.Errorf("TODO") +} + +func torrentToRemote(torrentCli *sync.TorrentClient, rcCli *downloader.RCloneClient, src *sync.Locator, dst *sync.Locator, from uint64, to uint64, snapTypes []snaptype.Type, torrents, hashes, manifest bool) error { + return fmt.Errorf("TODO") +} + +func localToRemote(rcCli *downloader.RCloneClient, src *sync.Locator, dst *sync.Locator, from uint64, to uint64, snapTypes []snaptype.Type, torrents, hashes, manifest bool) error { + return fmt.Errorf("TODO") +} + +func localToLocal(src *sync.Locator, dst *sync.Locator, from uint64, to uint64, snapTypes []snaptype.Type, torrents, hashes, manifest bool) error { + return fmt.Errorf("TODO") +} + +func remoteToLocal(ctx context.Context, rcCli *downloader.RCloneClient, src *sync.Locator, dst *sync.Locator, from uint64, to uint64, snapTypes []snaptype.Type, torrents, hashes, manifest bool) error { + logger := sync.Logger(ctx) + + if rcCli == nil { + return fmt.Errorf("no remote downloader") + } + + session, err := rcCli.NewSession(ctx, dst.Root, src.Src+":"+src.Root) + + if err != nil { + return err + } + + logger.Info("Reading src dir", "remoteFs", session.RemoteFsRoot(), "label", session.Label()) + fileEntries, err := session.ReadRemoteDir(ctx, true) + + if err != nil { + return err + } + + files := selectFiles(fileEntries, dst.Version, from, to, snapTypes, torrents, hashes, manifest) + + logger.Info(fmt.Sprintf("Downloading %s", files)) + + return session.Download(ctx, files...) +} + +func remoteToRemote(rcCli *downloader.RCloneClient, src *sync.Locator, dst *sync.Locator, from uint64, to uint64, snapTypes []snaptype.Type, torrents, hashes, manifest bool) error { + return fmt.Errorf("TODO") +} + +type sinf struct { + snaptype.FileInfo +} + +func (i sinf) Version() uint8 { + return i.FileInfo.Version +} + +func (i sinf) From() uint64 { + return i.FileInfo.From +} + +func (i sinf) To() uint64 { + return i.FileInfo.To +} + +func (i sinf) Type() snaptype.Type { + return i.FileInfo.T +} + +func selectFiles(entries []fs.DirEntry, version uint8, firstBlock, lastBlock uint64, snapTypes []snaptype.Type, torrents, hashes, manifest bool) []string { + var files []string + + for _, ent := range entries { + if info, err := ent.Info(); err == nil { + snapInfo, _ := info.Sys().(downloader.SnapInfo) + + if torrents { + if ext := filepath.Ext(info.Name()); ext == ".torrent" { + fileName := strings.TrimSuffix(info.Name(), ".torrent") + + if fileInfo, ok := snaptype.ParseFileName("", fileName); ok { + snapInfo = sinf{fileInfo} + } + } + } + + switch { + case snapInfo != nil && snapInfo.Type() != snaptype.Unknown: + if (version == 0 || version == snapInfo.Version()) && + (firstBlock == 0 || snapInfo.From() >= firstBlock) && + (lastBlock == 0 || snapInfo.From() < lastBlock) { + + if len(snapTypes) == 0 { + files = append(files, info.Name()) + } else { + for _, snapType := range snapTypes { + if snapType == snapInfo.Type() { + files = append(files, info.Name()) + break + } + } + } + } + + case manifest: + + case hashes: + + } + } + } + + return files +} diff --git a/cmd/snapshots/flags/flags.go b/cmd/snapshots/flags/flags.go new file mode 100644 index 00000000000..b905ffa1cc0 --- /dev/null +++ b/cmd/snapshots/flags/flags.go @@ -0,0 +1,11 @@ +package flags + +import "github.com/urfave/cli/v2" + +var ( + SegTypes = cli.StringSliceFlag{ + Name: "types", + Usage: `Segment types to comparre with optional e.g. headers,bodies,transactions`, + Required: false, + } +) diff --git a/cmd/snapshots/main.go b/cmd/snapshots/main.go new file mode 100644 index 00000000000..47e2f447616 --- /dev/null +++ b/cmd/snapshots/main.go @@ -0,0 +1,112 @@ +package main + +import ( + "context" + "fmt" + "os" + "os/signal" + "path/filepath" + "syscall" + + "github.com/ledgerwatch/erigon/cmd/snapshots/cmp" + "github.com/ledgerwatch/erigon/cmd/snapshots/copy" + "github.com/ledgerwatch/erigon/cmd/snapshots/manifest" + "github.com/ledgerwatch/erigon/cmd/snapshots/sync" + "github.com/ledgerwatch/erigon/cmd/snapshots/torrents" + "github.com/ledgerwatch/erigon/cmd/snapshots/verify" + "github.com/ledgerwatch/erigon/cmd/utils" + "github.com/ledgerwatch/erigon/params" + "github.com/ledgerwatch/erigon/turbo/debug" + "github.com/ledgerwatch/erigon/turbo/logging" + "github.com/ledgerwatch/log/v3" + "github.com/urfave/cli/v2" +) + +func main() { + logging.LogVerbosityFlag.Value = log.LvlError.String() + logging.LogConsoleVerbosityFlag.Value = log.LvlError.String() + + app := cli.NewApp() + app.Name = "snapshots" + app.Version = params.VersionWithCommit(params.GitCommit) + + app.Commands = []*cli.Command{ + &cmp.Command, + ©.Command, + &verify.Command, + &torrents.Command, + &manifest.Command, + } + + app.Flags = []cli.Flag{} + + app.UsageText = app.Name + ` [command] [flags]` + + app.Action = func(context *cli.Context) error { + if context.Args().Present() { + var goodNames []string + for _, c := range app.VisibleCommands() { + goodNames = append(goodNames, c.Name) + } + _, _ = fmt.Fprintf(os.Stderr, "Command '%s' not found. Available commands: %s\n", context.Args().First(), goodNames) + cli.ShowAppHelpAndExit(context, 1) + } + + return nil + } + + for _, command := range app.Commands { + command.Before = func(ctx *cli.Context) error { + debug.RaiseFdLimit() + + logger, err := setupLogger(ctx) + + if err != nil { + return err + } + + var cancel context.CancelFunc + + ctx.Context, cancel = context.WithCancel(sync.WithLogger(ctx.Context, logger)) + + go handleTerminationSignals(cancel, logger) + + return nil + } + } + + if err := app.Run(os.Args); err != nil { + _, _ = fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } +} + +func setupLogger(ctx *cli.Context) (log.Logger, error) { + dataDir := ctx.String(utils.DataDirFlag.Name) + + if len(dataDir) > 0 { + logsDir := filepath.Join(dataDir, "logs") + + if err := os.MkdirAll(logsDir, 0755); err != nil { + return nil, err + } + } + + logger := logging.SetupLoggerCtx("snapshots-"+ctx.Command.Name, ctx, log.LvlError, log.LvlInfo, false) + + return logger, nil +} + +func handleTerminationSignals(stopFunc func(), logger log.Logger) { + signalCh := make(chan os.Signal, 1) + signal.Notify(signalCh, syscall.SIGTERM, syscall.SIGINT) + + switch s := <-signalCh; s { + case syscall.SIGTERM: + logger.Info("Stopping") + stopFunc() + case syscall.SIGINT: + logger.Info("Terminating") + os.Exit(-int(syscall.SIGINT)) + } +} diff --git a/cmd/snapshots/manifest/manifest.go b/cmd/snapshots/manifest/manifest.go new file mode 100644 index 00000000000..54e803fb0c2 --- /dev/null +++ b/cmd/snapshots/manifest/manifest.go @@ -0,0 +1,365 @@ +package manifest + +import ( + "bufio" + "bytes" + "context" + "fmt" + "io/fs" + "os" + "path/filepath" + "sort" + "strings" + "time" + + "github.com/ledgerwatch/erigon-lib/downloader" + "github.com/ledgerwatch/erigon-lib/downloader/snaptype" + "github.com/ledgerwatch/erigon/cmd/snapshots/sync" + "github.com/ledgerwatch/erigon/cmd/utils" + "github.com/ledgerwatch/erigon/turbo/logging" + "github.com/urfave/cli/v2" +) + +var ( + VersionFlag = cli.IntFlag{ + Name: "version", + Usage: `Manifest file versions`, + Required: false, + Value: 0, + } +) + +var Command = cli.Command{ + Action: func(cliCtx *cli.Context) error { + return manifest(cliCtx, "list") + }, + Name: "manifest", + Usage: "manifest utilities", + Subcommands: []*cli.Command{ + { + Action: func(cliCtx *cli.Context) error { + return manifest(cliCtx, "list") + }, + Name: "list", + Usage: "list manifest from storage location", + ArgsUsage: "", + }, + { + Action: func(cliCtx *cli.Context) error { + return manifest(cliCtx, "update") + }, + Name: "update", + Usage: "update the manifest to match the files available at its storage location", + ArgsUsage: "", + }, + { + Action: func(cliCtx *cli.Context) error { + return manifest(cliCtx, "verify") + }, + Name: "verify", + Usage: "verify that manifest matches the files available at its storage location", + ArgsUsage: "", + }, + }, + Flags: []cli.Flag{ + &VersionFlag, + &utils.DataDirFlag, + &logging.LogVerbosityFlag, + &logging.LogConsoleVerbosityFlag, + &logging.LogDirVerbosityFlag, + }, + Description: ``, +} + +func manifest(cliCtx *cli.Context, command string) error { + logger := sync.Logger(cliCtx.Context) + + var src *sync.Locator + var err error + + var rcCli *downloader.RCloneClient + + pos := 0 + + if cliCtx.Args().Len() == 0 { + return fmt.Errorf("missing manifest location") + } + + arg := cliCtx.Args().Get(pos) + + if src, err = sync.ParseLocator(arg); err != nil { + return err + } + + switch src.LType { + case sync.RemoteFs: + if rcCli == nil { + rcCli, err = downloader.NewRCloneClient(logger) + + if err != nil { + return err + } + } + + if err = sync.CheckRemote(rcCli, src.Src); err != nil { + return err + } + } + + var srcSession *downloader.RCloneSession + + tempDir, err := os.MkdirTemp("", "snapshot-manifest-") + + if err != nil { + return err + } + + defer os.RemoveAll(tempDir) + + if rcCli != nil { + if src != nil && src.LType == sync.RemoteFs { + srcSession, err = rcCli.NewSession(cliCtx.Context, tempDir, src.Src+":"+src.Root) + + if err != nil { + return err + } + } + } + + if src != nil && srcSession == nil { + return fmt.Errorf("no src session established") + } + + logger.Debug("Starting manifest " + command) + + var version *uint8 + + if val := cliCtx.Int(VersionFlag.Name); val != 0 { + v := uint8(val) + version = &v + } + + switch command { + case "update": + return updateManifest(cliCtx.Context, tempDir, srcSession, version) + case "verify": + return verifyManifest(cliCtx.Context, srcSession, version, os.Stdout) + default: + return listManifest(cliCtx.Context, srcSession, os.Stdout) + } +} + +func listManifest(ctx context.Context, srcSession *downloader.RCloneSession, out *os.File) error { + entries, err := DownloadManifest(ctx, srcSession) + + if err != nil { + return err + } + + for _, fi := range entries { + fmt.Fprintln(out, fi.Name()) + } + + return nil +} + +func updateManifest(ctx context.Context, tmpDir string, srcSession *downloader.RCloneSession, version *uint8) error { + entities, err := srcSession.ReadRemoteDir(ctx, true) + + if err != nil { + return err + } + + manifestFile := "manifest.txt" + + fileMap := map[string]string{} + torrentMap := map[string]string{} + + for _, fi := range entities { + var file string + var files map[string]string + + if filepath.Ext(fi.Name()) == ".torrent" { + file = strings.TrimSuffix(fi.Name(), ".torrent") + files = torrentMap + } else { + file = fi.Name() + files = fileMap + } + + info, ok := snaptype.ParseFileName("", file) + + if !ok || (version != nil && *version != info.Version) { + continue + } + + files[file] = fi.Name() + } + + var files []string + + for file := range fileMap { + if torrent, ok := torrentMap[file]; ok { + files = append(files, file, torrent) + } + } + + sort.Strings(files) + + manifestEntries := bytes.Buffer{} + + for _, file := range files { + fmt.Fprintln(&manifestEntries, file) + } + + _ = os.WriteFile(filepath.Join(tmpDir, manifestFile), manifestEntries.Bytes(), 0644) + defer os.Remove(filepath.Join(tmpDir, manifestFile)) + + return srcSession.Upload(ctx, manifestFile) +} + +func verifyManifest(ctx context.Context, srcSession *downloader.RCloneSession, version *uint8, out *os.File) error { + manifestEntries, err := DownloadManifest(ctx, srcSession) + + if err != nil { + return fmt.Errorf("verification failed: can't read manifest: %w", err) + } + + manifestFiles := map[string]struct{}{} + + for _, fi := range manifestEntries { + var file string + + if filepath.Ext(fi.Name()) == ".torrent" { + file = strings.TrimSuffix(fi.Name(), ".torrent") + } else { + file = fi.Name() + } + + info, ok := snaptype.ParseFileName("", file) + + if !ok || (version != nil && *version != info.Version) { + continue + } + + manifestFiles[fi.Name()] = struct{}{} + } + + dirEntries, err := srcSession.ReadRemoteDir(ctx, true) + + if err != nil { + return fmt.Errorf("verification failed: can't read dir: %w", err) + } + + dirFiles := map[string]struct{}{} + + for _, fi := range dirEntries { + + var file string + + if filepath.Ext(fi.Name()) == ".torrent" { + file = strings.TrimSuffix(fi.Name(), ".torrent") + } else { + file = fi.Name() + } + + info, ok := snaptype.ParseFileName("", file) + + if !ok || (version != nil && *version != info.Version) { + continue + } + + if _, ok := manifestFiles[fi.Name()]; ok { + delete(manifestFiles, fi.Name()) + } else { + dirFiles[fi.Name()] = struct{}{} + } + } + + var missing string + var extra string + + if len(manifestFiles) != 0 { + files := make([]string, len(manifestFiles)) + + for file := range manifestFiles { + files = append(files, file) + } + + missing = fmt.Sprintf(": manifest files not in src: %s", files) + } + + if len(dirFiles) != 0 { + files := make([]string, len(dirFiles)) + + for file := range dirFiles { + files = append(files, file) + } + + extra = fmt.Sprintf(": src files not in manifest: %s", files) + } + + if len(missing) > 0 || len(extra) != 0 { + return fmt.Errorf("manifest does not match src contents%s%s", missing, extra) + } + return nil +} + +type dirEntry struct { + name string +} + +func (e dirEntry) Name() string { + return e.name +} + +func (e dirEntry) IsDir() bool { + return false +} + +func (e dirEntry) Type() fs.FileMode { + return e.Mode() +} + +func (e dirEntry) Size() int64 { + return -1 +} + +func (e dirEntry) Mode() fs.FileMode { + return fs.ModeIrregular +} + +func (e dirEntry) ModTime() time.Time { + return time.Time{} +} + +func (e dirEntry) Sys() any { + return nil +} + +func (e dirEntry) Info() (fs.FileInfo, error) { + return e, nil +} + +func DownloadManifest(ctx context.Context, session *downloader.RCloneSession) ([]fs.DirEntry, error) { + + reader, err := session.Cat(ctx, "manifest.txt") + + if err != nil { + return nil, err + } + + var entries []fs.DirEntry + + scanner := bufio.NewScanner(reader) + + for scanner.Scan() { + entries = append(entries, dirEntry{scanner.Text()}) + } + + if err := scanner.Err(); err != nil { + return nil, err + } + + return entries, nil +} diff --git a/cmd/snapshots/sync/context.go b/cmd/snapshots/sync/context.go new file mode 100644 index 00000000000..fce2de1215c --- /dev/null +++ b/cmd/snapshots/sync/context.go @@ -0,0 +1,38 @@ +package sync + +import ( + "context" + + "github.com/ledgerwatch/log/v3" +) + +type ctxKey int + +const ( + ckLogger ctxKey = iota + ckTempDir +) + +func WithLogger(ctx context.Context, logger log.Logger) context.Context { + return context.WithValue(ctx, ckLogger, logger) +} + +func Logger(ctx context.Context) log.Logger { + if logger, ok := ctx.Value(ckLogger).(log.Logger); ok { + return logger + } + + return log.Root() +} + +func WithTempDir(ctx context.Context, tempDir string) context.Context { + return context.WithValue(ctx, ckTempDir, tempDir) +} + +func TempDir(ctx context.Context) string { + if tempDir, ok := ctx.Value(ckTempDir).(string); ok { + return tempDir + } + + return "" +} diff --git a/cmd/snapshots/sync/sync.go b/cmd/snapshots/sync/sync.go new file mode 100644 index 00000000000..c01626f0678 --- /dev/null +++ b/cmd/snapshots/sync/sync.go @@ -0,0 +1,444 @@ +package sync + +import ( + "bufio" + "context" + "fmt" + "io/fs" + "os" + "path/filepath" + "regexp" + "runtime" + "strconv" + "strings" + "time" + + "github.com/anacrolix/torrent" + "github.com/anacrolix/torrent/metainfo" + "github.com/anacrolix/torrent/storage" + "github.com/c2h5oh/datasize" + "github.com/ledgerwatch/erigon-lib/chain/snapcfg" + "github.com/ledgerwatch/erigon-lib/common" + "github.com/ledgerwatch/erigon-lib/common/datadir" + "github.com/ledgerwatch/erigon-lib/downloader" + "github.com/ledgerwatch/erigon-lib/downloader/downloadercfg" + "github.com/ledgerwatch/erigon-lib/downloader/snaptype" + "github.com/ledgerwatch/erigon/cmd/downloader/downloadernat" + "github.com/ledgerwatch/erigon/cmd/utils" + "github.com/ledgerwatch/erigon/p2p/nat" + "github.com/ledgerwatch/erigon/params" + "github.com/urfave/cli/v2" + "golang.org/x/exp/slices" + "golang.org/x/sync/errgroup" +) + +type LType int + +const ( + TorrentFs LType = iota + LocalFs + RemoteFs +) + +type Locator struct { + LType LType + Src string + Root string + Version uint8 + Chain string +} + +func (l Locator) String() string { + var val string + + switch l.LType { + case TorrentFs: + val = "torrent" + case LocalFs: + val = l.Root + case RemoteFs: + val = l.Src + ":" + l.Root + } + + if l.Version > 0 { + val += fmt.Sprint(":v", l.Version) + } + + return val +} + +var locatorExp, _ = regexp.Compile(`^(?:(\w+)\:)?([^\:]*)(?:\:(v\d+))?`) +var srcExp, _ = regexp.Compile(`^erigon-v\d+-snapshots-(.*)$`) + +func ParseLocator(value string) (*Locator, error) { + if matches := locatorExp.FindStringSubmatch(value); len(matches) > 0 { + var loc Locator + + switch { + case matches[1] == "torrent": + loc.LType = TorrentFs + + if len(matches[2]) > 0 { + version, err := strconv.ParseUint(matches[2][1:], 10, 8) + if err != nil { + return nil, fmt.Errorf("can't parse version: %s: %w", matches[3], err) + } + + loc.Version = uint8(version) + } + + case len(matches[1]) > 0: + loc.LType = RemoteFs + loc.Src = matches[1] + loc.Root = matches[2] + + if matches := srcExp.FindStringSubmatch(loc.Root); len(matches) > 1 { + loc.Chain = matches[1] + } + + if len(matches[3]) > 0 { + version, err := strconv.ParseUint(matches[3][1:], 10, 8) + if err != nil { + return nil, fmt.Errorf("can't parse version: %s: %w", matches[3], err) + } + + loc.Version = uint8(version) + } + + default: + loc.LType = LocalFs + loc.Root = downloader.Clean(matches[2]) + } + + return &loc, nil + } + + if path, err := filepath.Abs(value); err == nil { + return &Locator{ + LType: LocalFs, + Root: path, + }, nil + } + + return nil, fmt.Errorf("Invalid locator syntax") +} + +type TorrentClient struct { + *torrent.Client + cfg *torrent.ClientConfig +} + +func NewTorrentClient(cliCtx *cli.Context, chain string) (*TorrentClient, error) { + logger := Logger(cliCtx.Context) + tempDir := TempDir(cliCtx.Context) + + torrentDir := filepath.Join(tempDir, "torrents", chain) + + dirs := datadir.New(torrentDir) + + webseedsList := common.CliString2Array(cliCtx.String(utils.WebSeedsFlag.Name)) + + if known, ok := snapcfg.KnownWebseeds[chain]; ok { + webseedsList = append(webseedsList, known...) + } + + var downloadRate, uploadRate datasize.ByteSize + + if err := downloadRate.UnmarshalText([]byte(cliCtx.String(utils.TorrentDownloadRateFlag.Name))); err != nil { + return nil, err + } + + if err := uploadRate.UnmarshalText([]byte(cliCtx.String(utils.TorrentUploadRateFlag.Name))); err != nil { + return nil, err + } + + logLevel, _, err := downloadercfg.Int2LogLevel(cliCtx.Int(utils.TorrentVerbosityFlag.Name)) + + if err != nil { + return nil, err + } + + version := "erigon: " + params.VersionWithCommit(params.GitCommit) + + cfg, err := downloadercfg.New(dirs, version, logLevel, downloadRate, uploadRate, + cliCtx.Int(utils.TorrentPortFlag.Name), + cliCtx.Int(utils.TorrentConnsPerFileFlag.Name), 0, nil, webseedsList, chain) + + if err != nil { + return nil, err + } + + err = os.RemoveAll(torrentDir) + + if err != nil { + return nil, fmt.Errorf("can't clean torrent dir: %w", err) + } + + if err := os.MkdirAll(torrentDir, 0755); err != nil { + return nil, err + } + + cfg.ClientConfig.DataDir = torrentDir + + cfg.ClientConfig.PieceHashersPerTorrent = 32 * runtime.NumCPU() + cfg.ClientConfig.DisableIPv6 = cliCtx.Bool(utils.DisableIPV6.Name) + cfg.ClientConfig.DisableIPv4 = cliCtx.Bool(utils.DisableIPV4.Name) + + natif, err := nat.Parse(utils.NATFlag.Value) + + if err != nil { + return nil, fmt.Errorf("invalid nat option %s: %w", utils.NATFlag.Value, err) + } + + downloadernat.DoNat(natif, cfg.ClientConfig, logger) + + cfg.ClientConfig.DefaultStorage = storage.NewMMap(torrentDir) + + cli, err := torrent.NewClient(cfg.ClientConfig) + + if err != nil { + return nil, fmt.Errorf("can't create torrent client: %w", err) + } + + return &TorrentClient{cli, cfg.ClientConfig}, nil +} + +type torrentSession struct { + cli *TorrentClient + items map[string]snapcfg.PreverifiedItem +} + +type fileInfo struct { + info snapcfg.PreverifiedItem +} + +func (fi *fileInfo) Name() string { + return fi.info.Name +} + +func (fi *fileInfo) Size() int64 { + return 0 +} + +func (fi *fileInfo) Mode() fs.FileMode { + return fs.ModeIrregular +} + +func (fi *fileInfo) ModTime() time.Time { + return time.Time{} +} + +func (fi *fileInfo) IsDir() bool { + return false +} + +type torrentInfo struct { + snapInfo *snaptype.FileInfo + hash string +} + +func (i *torrentInfo) Version() uint8 { + if i.snapInfo != nil { + return i.snapInfo.Version + } + + return 0 +} + +func (i *torrentInfo) From() uint64 { + if i.snapInfo != nil { + return i.snapInfo.From + } + + return 0 +} + +func (i *torrentInfo) To() uint64 { + if i.snapInfo != nil { + return i.snapInfo.To + } + + return 0 +} + +func (i *torrentInfo) Type() snaptype.Type { + if i.snapInfo != nil { + return i.snapInfo.T + } + + return 0 +} + +func (i *torrentInfo) Hash() string { + return i.hash +} + +func (fi *fileInfo) Sys() any { + info := torrentInfo{hash: fi.info.Hash} + if snapInfo, ok := snaptype.ParseFileName("", fi.Name()); ok { + info.snapInfo = &snapInfo + } + + return &info +} + +type dirEntry struct { + info *fileInfo +} + +func (e dirEntry) Name() string { + return e.info.Name() +} + +func (e dirEntry) IsDir() bool { + return e.info.IsDir() +} + +func (e dirEntry) Type() fs.FileMode { + return fs.ModeIrregular +} + +func (e dirEntry) Info() (fs.FileInfo, error) { + return e.info, nil +} + +func (s *torrentSession) ReadRemoteDir(ctx context.Context, refresh bool) ([]fs.DirEntry, error) { + var entries = make([]fs.DirEntry, 0, len(s.items)) + + for _, info := range s.items { + entries = append(entries, &dirEntry{&fileInfo{info}}) + } + + slices.SortFunc(entries, func(a, b fs.DirEntry) int { + return strings.Compare(a.Name(), b.Name()) + }) + + return entries, nil +} + +func (s *torrentSession) LocalFsRoot() string { + return s.cli.cfg.DataDir +} + +func (s *torrentSession) RemoteFsRoot() string { + return "" +} + +func (s *torrentSession) Download(ctx context.Context, files ...string) error { + g, ctx := errgroup.WithContext(ctx) + g.SetLimit(len(files)) + + for _, f := range files { + file := f + + g.Go(func() error { + it, ok := s.items[file] + + if !ok { + return fs.ErrNotExist + } + + t, err := func() (*torrent.Torrent, error) { + infoHash := snaptype.Hex2InfoHash(it.Hash) + + for _, t := range s.cli.Torrents() { + if t.Name() == file { + return t, nil + } + } + + mi := &metainfo.MetaInfo{AnnounceList: downloader.Trackers} + magnet := mi.Magnet(&infoHash, &metainfo.Info{Name: file}) + spec, err := torrent.TorrentSpecFromMagnetUri(magnet.String()) + + if err != nil { + return nil, err + } + + spec.DisallowDataDownload = true + + t, _, err := s.cli.AddTorrentSpec(spec) + if err != nil { + return nil, err + } + + return t, nil + }() + + if err != nil { + return err + } + + select { + case <-ctx.Done(): + return ctx.Err() + case <-t.GotInfo(): + } + + if !t.Complete.Bool() { + t.AllowDataDownload() + t.DownloadAll() + select { + case <-ctx.Done(): + return ctx.Err() + case <-t.Complete.On(): + } + } + + closed := t.Closed() + t.Drop() + <-closed + + return nil + }) + } + + return g.Wait() +} + +func (s *torrentSession) Label() string { + return "torrents" +} + +func NewTorrentSession(cli *TorrentClient, chain string) *torrentSession { + session := &torrentSession{cli, map[string]snapcfg.PreverifiedItem{}} + for _, it := range snapcfg.KnownCfg(chain, 0).Preverified { + session.items[it.Name] = it + } + + return session +} + +func DownloadManifest(ctx context.Context, session DownloadSession) ([]fs.DirEntry, error) { + if session, ok := session.(*downloader.RCloneSession); ok { + reader, err := session.Cat(ctx, "manifest.txt") + + if err != nil { + return nil, err + } + + var entries []fs.DirEntry + + scanner := bufio.NewScanner(reader) + + for scanner.Scan() { + entries = append(entries, dirEntry{&fileInfo{snapcfg.PreverifiedItem{Name: scanner.Text()}}}) + } + + if err := scanner.Err(); err != nil { + return nil, err + } + + return entries, nil + } + + return nil, fmt.Errorf("not implemented for %T", session) +} + +type DownloadSession interface { + Download(ctx context.Context, files ...string) error + ReadRemoteDir(ctx context.Context, refresh bool) ([]fs.DirEntry, error) + LocalFsRoot() string + RemoteFsRoot() string + Label() string +} diff --git a/cmd/snapshots/sync/util.go b/cmd/snapshots/sync/util.go new file mode 100644 index 00000000000..a0a69547bd6 --- /dev/null +++ b/cmd/snapshots/sync/util.go @@ -0,0 +1,32 @@ +package sync + +import ( + "context" + "fmt" + + "github.com/ledgerwatch/erigon-lib/downloader" +) + +func CheckRemote(rcCli *downloader.RCloneClient, src string) error { + + remotes, err := rcCli.ListRemotes(context.Background()) + + if err != nil { + return err + } + + hasRemote := false + + for _, remote := range remotes { + if src == remote { + hasRemote = true + break + } + } + + if !hasRemote { + return fmt.Errorf("unknown remote: %s", src) + } + + return nil +} diff --git a/cmd/snapshots/torrents/torrents.go b/cmd/snapshots/torrents/torrents.go new file mode 100644 index 00000000000..01f01ab6e14 --- /dev/null +++ b/cmd/snapshots/torrents/torrents.go @@ -0,0 +1,504 @@ +package torrents + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strconv" + "strings" + gosync "sync" + "time" + + "golang.org/x/exp/slices" + + "github.com/ledgerwatch/log/v3" + + "github.com/anacrolix/torrent/metainfo" + "github.com/ledgerwatch/erigon-lib/downloader" + "github.com/ledgerwatch/erigon-lib/downloader/snaptype" + "github.com/ledgerwatch/erigon/cmd/snapshots/manifest" + "github.com/ledgerwatch/erigon/cmd/snapshots/sync" + "github.com/ledgerwatch/erigon/cmd/utils" + "github.com/ledgerwatch/erigon/turbo/logging" + "github.com/urfave/cli/v2" + "golang.org/x/sync/errgroup" +) + +var Command = cli.Command{ + Action: func(cliCtx *cli.Context) error { + return torrents(cliCtx, "list") + }, + Name: "torrent", + Usage: "torrent utilities", + Subcommands: []*cli.Command{ + { + Action: func(cliCtx *cli.Context) error { + return torrents(cliCtx, "list") + }, + Name: "list", + Usage: "list torrents available at the specified storage location", + ArgsUsage: "", + }, + { + Action: func(cliCtx *cli.Context) error { + return torrents(cliCtx, "hashes") + }, + Name: "hashes", + Usage: "list the hashes (in toml format) at the specified storage location", + ArgsUsage: " ", + }, + { + Action: func(cliCtx *cli.Context) error { + return torrents(cliCtx, "update") + }, + Name: "update", + Usage: "update re-create the torrents for the contents available at its storage location", + ArgsUsage: " ", + }, + { + Action: func(cliCtx *cli.Context) error { + return torrents(cliCtx, "verify") + }, + Name: "verify", + Usage: "verify that manifest contents are available at its storage location", + ArgsUsage: " ", + }, + }, + Flags: []cli.Flag{ + &utils.DataDirFlag, + &logging.LogVerbosityFlag, + &logging.LogConsoleVerbosityFlag, + &logging.LogDirVerbosityFlag, + }, + Description: ``, +} + +func torrents(cliCtx *cli.Context, command string) error { + logger := sync.Logger(cliCtx.Context) + + var src *sync.Locator + var err error + + var firstBlock, lastBlock uint64 + + pos := 0 + + if src, err = sync.ParseLocator(cliCtx.Args().Get(pos)); err != nil { + return err + } + + pos++ + + if cliCtx.Args().Len() > pos { + if src, err = sync.ParseLocator(cliCtx.Args().Get(pos)); err != nil { + return err + } + + if err != nil { + return err + } + } + + pos++ + + if cliCtx.Args().Len() > pos { + firstBlock, err = strconv.ParseUint(cliCtx.Args().Get(pos), 10, 64) + if err != nil { + return err + } + } + + pos++ + + if cliCtx.Args().Len() > pos { + lastBlock, err = strconv.ParseUint(cliCtx.Args().Get(pos), 10, 64) + + if err != nil { + return err + } + } + + if src == nil { + return fmt.Errorf("missing data source") + } + + var rcCli *downloader.RCloneClient + + switch src.LType { + case sync.RemoteFs: + if rcCli == nil { + rcCli, err = downloader.NewRCloneClient(logger) + + if err != nil { + return err + } + } + + if err = sync.CheckRemote(rcCli, src.Src); err != nil { + return err + } + } + + var srcSession *downloader.RCloneSession + + dataDir := cliCtx.String(utils.DataDirFlag.Name) + var tempDir string + + if len(dataDir) == 0 { + dataDir, err := os.MkdirTemp("", "snapshot-torrents-") + if err != nil { + return err + } + tempDir = dataDir + defer os.RemoveAll(dataDir) + } else { + tempDir = filepath.Join(dataDir, "temp") + + if err := os.MkdirAll(tempDir, 0755); err != nil { + return err + } + } + + if rcCli != nil { + if src != nil && src.LType == sync.RemoteFs { + srcSession, err = rcCli.NewSession(cliCtx.Context, filepath.Join(tempDir, "src"), src.Src+":"+src.Root) + + if err != nil { + return err + } + } + } + + if src != nil && srcSession == nil { + return fmt.Errorf("no src session established") + } + + logger.Debug("Starting torrents " + command) + + switch command { + case "hashes": + return torrentHashes(cliCtx.Context, srcSession, firstBlock, lastBlock) + case "update": + startTime := time.Now() + + logger.Info(fmt.Sprintf("Starting update: %s", src.String()), "first", firstBlock, "last", lastBlock, "dir", tempDir) + + err := updateTorrents(cliCtx.Context, srcSession, firstBlock, lastBlock, logger) + + if err == nil { + logger.Info(fmt.Sprintf("Finished update: %s", src.String()), "elapsed", time.Since(startTime)) + } else { + logger.Info(fmt.Sprintf("Aborted update: %s", src.String()), "err", err) + } + + return err + + case "verify": + startTime := time.Now() + + logger.Info(fmt.Sprintf("Starting verify: %s", src.String()), "first", firstBlock, "last", lastBlock, "dir", tempDir) + + err := verifyTorrents(cliCtx.Context, srcSession, firstBlock, lastBlock, logger) + + if err == nil { + logger.Info(fmt.Sprintf("Verified: %s", src.String()), "elapsed", time.Since(startTime)) + } else { + logger.Info(fmt.Sprintf("Verification failed: %s", src.String()), "err", err) + } + + return err + } + + return listTorrents(cliCtx.Context, srcSession, os.Stdout, firstBlock, lastBlock) +} + +func listTorrents(ctx context.Context, srcSession *downloader.RCloneSession, out *os.File, from uint64, to uint64) error { + entries, err := manifest.DownloadManifest(ctx, srcSession) + + if err != nil { + entries, err = srcSession.ReadRemoteDir(ctx, true) + } + + if err != nil { + return err + } + + for _, fi := range entries { + if filepath.Ext(fi.Name()) == ".torrent" { + if from > 0 || to > 0 { + info, _ := snaptype.ParseFileName("", strings.TrimSuffix(fi.Name(), ".torrent")) + + if from > 0 && info.From < from { + continue + } + + if to > 0 && info.From > to { + continue + } + } + + fmt.Fprintln(out, fi.Name()) + } + } + + return nil +} + +func torrentHashes(ctx context.Context, srcSession *downloader.RCloneSession, from uint64, to uint64) error { + entries, err := manifest.DownloadManifest(ctx, srcSession) + + if err != nil { + return err + } + + type hashInfo struct { + name, hash string + } + + var hashes []hashInfo + var hashesMutex gosync.Mutex + + g, gctx := errgroup.WithContext(ctx) + g.SetLimit(16) + + for _, fi := range entries { + if filepath.Ext(fi.Name()) == ".torrent" { + if from > 0 || to > 0 { + info, _ := snaptype.ParseFileName("", strings.TrimSuffix(fi.Name(), ".torrent")) + + if from > 0 && info.From < from { + continue + } + + if to > 0 && info.From > to { + continue + } + } + + file := fi.Name() + + g.Go(func() error { + var mi *metainfo.MetaInfo + + errs := 0 + + for { + reader, err := srcSession.Cat(gctx, file) + + if err != nil { + return fmt.Errorf("can't read remote torrent: %s: %w", file, err) + } + + mi, err = metainfo.Load(reader) + + if err != nil { + errs++ + + if errs == 4 { + return fmt.Errorf("can't parse remote torrent: %s: %w", file, err) + } + + continue + } + + break + } + + info, err := mi.UnmarshalInfo() + + if err != nil { + return fmt.Errorf("can't unmarshal torrent info: %s: %w", file, err) + } + + hashesMutex.Lock() + defer hashesMutex.Unlock() + hashes = append(hashes, hashInfo{info.Name, mi.HashInfoBytes().String()}) + + return nil + }) + } + } + + if err := g.Wait(); err != nil { + return err + } + + slices.SortFunc(hashes, func(a, b hashInfo) int { + return strings.Compare(a.name, b.name) + }) + + for _, hi := range hashes { + fmt.Printf("'%s' = '%s'\n", hi.name, hi.hash) + } + + return nil +} + +func updateTorrents(ctx context.Context, srcSession *downloader.RCloneSession, from uint64, to uint64, logger log.Logger) error { + entries, err := manifest.DownloadManifest(ctx, srcSession) + + if err != nil { + return err + } + + g, gctx := errgroup.WithContext(ctx) + g.SetLimit(16) + + torrentFiles := downloader.NewAtomicTorrentFiles(srcSession.LocalFsRoot()) + + for _, fi := range entries { + if filepath.Ext(fi.Name()) == ".torrent" { + file := strings.TrimSuffix(fi.Name(), ".torrent") + + g.Go(func() error { + if from > 0 || to > 0 { + info, _ := snaptype.ParseFileName("", file) + + if from > 0 && info.From < from { + return nil + } + + if to > 0 && info.From > to { + return nil + } + } + + logger.Info(fmt.Sprintf("Updating %s", file+".torrent")) + + err := srcSession.Download(gctx, file) + + if err != nil { + return err + } + + defer os.Remove(filepath.Join(srcSession.LocalFsRoot(), file)) + + err = downloader.BuildTorrentIfNeed(gctx, file, srcSession.LocalFsRoot(), torrentFiles) + + if err != nil { + return err + } + + defer os.Remove(filepath.Join(srcSession.LocalFsRoot(), file+".torrent")) + + return srcSession.Upload(gctx, file+".torrent") + }) + } + } + + return g.Wait() +} + +func verifyTorrents(ctx context.Context, srcSession *downloader.RCloneSession, from uint64, to uint64, logger log.Logger) error { + entries, err := manifest.DownloadManifest(ctx, srcSession) + + if err != nil { + return err + } + + g, gctx := errgroup.WithContext(ctx) + g.SetLimit(16) + + torrentFiles := downloader.NewAtomicTorrentFiles(srcSession.LocalFsRoot()) + + for _, fi := range entries { + if filepath.Ext(fi.Name()) == ".torrent" { + file := strings.TrimSuffix(fi.Name(), ".torrent") + + g.Go(func() error { + if from > 0 || to > 0 { + info, _ := snaptype.ParseFileName("", file) + + if from > 0 && info.From < from { + return nil + } + + if to > 0 && info.From > to { + return nil + } + } + + logger.Info(fmt.Sprintf("Validating %s", file+".torrent")) + + var mi *metainfo.MetaInfo + + errs := 0 + + for { + reader, err := srcSession.Cat(gctx, file+".torrent") + + if err != nil { + return fmt.Errorf("can't read remote torrent: %s: %w", file+".torrent", err) + } + + mi, err = metainfo.Load(reader) + + if err != nil { + errs++ + + if errs == 4 { + return fmt.Errorf("can't parse remote torrent: %s: %w", file+".torrent", err) + } + + continue + } + + break + } + + info, err := mi.UnmarshalInfo() + + if err != nil { + return fmt.Errorf("can't unmarshal torrent info: %s: %w", file+".torrent", err) + } + + if info.Name != file { + return fmt.Errorf("torrent name does not match file: %s", file) + } + + err = srcSession.Download(gctx, file) + + if err != nil { + return err + } + + defer os.Remove(filepath.Join(srcSession.LocalFsRoot(), file)) + + err = downloader.BuildTorrentIfNeed(gctx, file, srcSession.LocalFsRoot(), torrentFiles) + + if err != nil { + return err + } + + torrentPath := filepath.Join(srcSession.LocalFsRoot(), file+".torrent") + + defer os.Remove(torrentPath) + + lmi, err := metainfo.LoadFromFile(torrentPath) + + if err != nil { + return fmt.Errorf("can't load local torrent from: %s: %w", torrentPath, err) + } + + if lmi.HashInfoBytes() != mi.HashInfoBytes() { + return fmt.Errorf("computed local hash does not match torrent: %s: expected: %s, got: %s", file+".torrent", lmi.HashInfoBytes(), mi.HashInfoBytes()) + } + + localInfo, err := lmi.UnmarshalInfo() + + if err != nil { + return fmt.Errorf("can't unmarshal local torrent info: %s: %w", torrentPath, err) + } + + if localInfo.Name != info.Name { + return fmt.Errorf("computed local name does not match torrent: %s: expected: %s, got: %s", file+".torrent", localInfo.Name, info.Name) + } + + return nil + }) + } + } + + return g.Wait() +} diff --git a/cmd/snapshots/verify/verify.go b/cmd/snapshots/verify/verify.go new file mode 100644 index 00000000000..bb0fbc83b70 --- /dev/null +++ b/cmd/snapshots/verify/verify.go @@ -0,0 +1,249 @@ +package verify + +import ( + "fmt" + "os" + "path/filepath" + "strconv" + + "github.com/ledgerwatch/erigon-lib/downloader" + "github.com/ledgerwatch/erigon-lib/downloader/snaptype" + "github.com/ledgerwatch/erigon/cmd/snapshots/flags" + "github.com/ledgerwatch/erigon/cmd/snapshots/sync" + "github.com/ledgerwatch/erigon/cmd/utils" + "github.com/urfave/cli/v2" +) + +var ( + SrcFlag = cli.StringFlag{ + Name: "src", + Usage: `Source location for verification files (torrent,hash,manifest)`, + Required: false, + } + DstFlag = cli.StringFlag{ + Name: "dst", + Usage: `Destination location containiong copies to be verified`, + Required: true, + } + ChainFlag = cli.StringFlag{ + Name: "chain", + Usage: `The chain being validated, required if not included src or dst naming`, + Required: false, + } + TorrentsFlag = cli.BoolFlag{ + Name: "torrents", + Usage: `Verify against torrent files`, + Required: false, + } + + HashesFlag = cli.BoolFlag{ + Name: "hashes", + Usage: `Verify against hash .toml contents`, + Required: false, + } + + ManifestFlag = cli.BoolFlag{ + Name: "manifest", + Usage: `Verify against manifest .txt contents`, + Required: false, + } +) + +var Command = cli.Command{ + Action: verify, + Name: "verify", + Usage: "verify snapshot segments against hashes and torrents", + ArgsUsage: " ", + Flags: []cli.Flag{ + &SrcFlag, + &DstFlag, + &ChainFlag, + &flags.SegTypes, + &TorrentsFlag, + &HashesFlag, + &ManifestFlag, + &utils.WebSeedsFlag, + &utils.NATFlag, + &utils.DisableIPV6, + &utils.DisableIPV4, + &utils.TorrentDownloadRateFlag, + &utils.TorrentUploadRateFlag, + &utils.TorrentVerbosityFlag, + &utils.TorrentPortFlag, + &utils.TorrentMaxPeersFlag, + &utils.TorrentConnsPerFileFlag, + }, + Description: ``, +} + +func verify(cliCtx *cli.Context) error { + logger := sync.Logger(cliCtx.Context) + + logger.Info("Starting verify") + + var src, dst *sync.Locator + var err error + + var rcCli *downloader.RCloneClient + var torrentCli *sync.TorrentClient + + if src, err = sync.ParseLocator(cliCtx.String(SrcFlag.Name)); err != nil { + return err + } + + if dst, err = sync.ParseLocator(cliCtx.String(DstFlag.Name)); err != nil { + return err + } + + chain := cliCtx.String(ChainFlag.Name) + + switch dst.LType { + case sync.TorrentFs: + torrentCli, err = sync.NewTorrentClient(cliCtx, dst.Chain) + if err != nil { + return fmt.Errorf("can't create torrent: %w", err) + } + + case sync.RemoteFs: + if rcCli == nil { + rcCli, err = downloader.NewRCloneClient(logger) + + if err != nil { + return err + } + } + + if err = sync.CheckRemote(rcCli, src.Src); err != nil { + return err + } + + if len(chain) == 0 { + chain = dst.Chain + } + } + + switch src.LType { + case sync.TorrentFs: + if torrentCli == nil { + torrentCli, err = sync.NewTorrentClient(cliCtx, dst.Chain) + if err != nil { + return fmt.Errorf("can't create torrent: %w", err) + } + } + + case sync.RemoteFs: + if rcCli == nil { + rcCli, err = downloader.NewRCloneClient(logger) + + if err != nil { + return err + } + } + + if err = sync.CheckRemote(rcCli, src.Src); err != nil { + return err + } + + if len(chain) == 0 { + chain = src.Chain + } + } + + typeValues := cliCtx.StringSlice(flags.SegTypes.Name) + snapTypes := make([]snaptype.Type, 0, len(typeValues)) + + for _, val := range typeValues { + segType, ok := snaptype.ParseFileType(val) + + if !ok { + return fmt.Errorf("unknown file type: %s", val) + } + + snapTypes = append(snapTypes, segType) + } + + torrents := cliCtx.Bool(TorrentsFlag.Name) + hashes := cliCtx.Bool(HashesFlag.Name) + manifest := cliCtx.Bool(ManifestFlag.Name) + + var firstBlock, lastBlock uint64 + + if cliCtx.Args().Len() > 0 { + if firstBlock, err = strconv.ParseUint(cliCtx.Args().Get(0), 10, 64); err != nil { + return err + } + } + + if cliCtx.Args().Len() > 1 { + if lastBlock, err = strconv.ParseUint(cliCtx.Args().Get(1), 10, 64); err != nil { + return err + } + } + + var srcSession sync.DownloadSession + var dstSession sync.DownloadSession + + dataDir := cliCtx.String(utils.DataDirFlag.Name) + var tempDir string + + if len(dataDir) == 0 { + dataDir, err := os.MkdirTemp("", "snapshot-verify-") + if err != nil { + return err + } + tempDir = dataDir + defer os.RemoveAll(dataDir) + } else { + tempDir = filepath.Join(dataDir, "temp") + + if err := os.MkdirAll(tempDir, 0755); err != nil { + return err + } + } + + if rcCli != nil { + if src != nil && src.LType == sync.RemoteFs { + srcSession, err = rcCli.NewSession(cliCtx.Context, filepath.Join(tempDir, "src"), src.Src+":"+src.Root) + + if err != nil { + return err + } + } + + if dst.LType == sync.RemoteFs { + dstSession, err = rcCli.NewSession(cliCtx.Context, filepath.Join(tempDir, "dst"), dst.Src+":"+dst.Root) + + if err != nil { + return err + } + } + } + + if torrentCli != nil { + if src != nil && src.LType == sync.TorrentFs { + srcSession = sync.NewTorrentSession(torrentCli, chain) + } + + if dst.LType == sync.TorrentFs { + dstSession = sync.NewTorrentSession(torrentCli, chain) + } + } + + if src != nil && srcSession == nil { + return fmt.Errorf("no src session established") + } + + if dstSession == nil { + return fmt.Errorf("no dst session established") + } + + if srcSession == nil { + srcSession = dstSession + } + + return verfifySnapshots(srcSession, dstSession, firstBlock, lastBlock, snapTypes, torrents, hashes, manifest) +} + +func verfifySnapshots(srcSession sync.DownloadSession, rcSession sync.DownloadSession, from uint64, to uint64, snapTypes []snaptype.Type, torrents, hashes, manifest bool) error { + return fmt.Errorf("TODO") +} diff --git a/cmd/state/commands/check_change_sets.go b/cmd/state/commands/check_change_sets.go index 85308fdcd3a..19eceebb5ca 100644 --- a/cmd/state/commands/check_change_sets.go +++ b/cmd/state/commands/check_change_sets.go @@ -46,6 +46,7 @@ func init() { withBlock(checkChangeSetsCmd) withDataDir(checkChangeSetsCmd) withSnapshotBlocks(checkChangeSetsCmd) + withSnapshotVersion(checkChangeSetsCmd) checkChangeSetsCmd.Flags().StringVar(&historyfile, "historyfile", "", "path to the file where the changesets and history are expected to be. If omitted, the same as /erion/chaindata") checkChangeSetsCmd.Flags().BoolVar(&nocheck, "nocheck", false, "set to turn off the changeset checking and only execute transaction (for performance testing)") rootCmd.AddCommand(checkChangeSetsCmd) @@ -56,13 +57,13 @@ var checkChangeSetsCmd = &cobra.Command{ Short: "Re-executes historical transactions in read-only mode and checks that their outputs match the database ChangeSets", RunE: func(cmd *cobra.Command, args []string) error { logger := debug.SetupCobra(cmd, "check_change_sets") - return CheckChangeSets(cmd.Context(), genesis, block, chaindata, historyfile, nocheck, logger) + return CheckChangeSets(cmd.Context(), genesis, snapshotVersion, block, chaindata, historyfile, nocheck, logger) }, } // CheckChangeSets re-executes historical transactions in read-only mode // and checks that their outputs match the database ChangeSets. -func CheckChangeSets(ctx context.Context, genesis *types.Genesis, blockNum uint64, chaindata string, historyfile string, nocheck bool, logger log.Logger) error { +func CheckChangeSets(ctx context.Context, genesis *types.Genesis, snapshotVersion uint8, blockNum uint64, chaindata string, historyfile string, nocheck bool, logger log.Logger) error { if len(historyfile) == 0 { historyfile = chaindata } @@ -81,7 +82,7 @@ func CheckChangeSets(ctx context.Context, genesis *types.Genesis, blockNum uint6 if err != nil { return err } - allSnapshots := freezeblocks.NewRoSnapshots(ethconfig.NewSnapCfg(true, false, true), path.Join(datadirCli, "snapshots"), logger) + allSnapshots := freezeblocks.NewRoSnapshots(ethconfig.NewSnapCfg(true, false, true), path.Join(datadirCli, "snapshots"), snapshotVersion, logger) defer allSnapshots.Close() if err := allSnapshots.ReopenFolder(); err != nil { return fmt.Errorf("reopen snapshot segments: %w", err) diff --git a/cmd/state/commands/global_flags_vars.go b/cmd/state/commands/global_flags_vars.go index dd81e19aee6..a45471410b7 100644 --- a/cmd/state/commands/global_flags_vars.go +++ b/cmd/state/commands/global_flags_vars.go @@ -19,6 +19,7 @@ var ( snapshotsCli bool chain string logdir string + snapshotVersion uint8 ) func must(err error) { @@ -39,6 +40,10 @@ func withDataDir(cmd *cobra.Command) { must(cmd.MarkFlagDirname("chaindata")) } +func withSnapshotVersion(cmd *cobra.Command) { + cmd.Flags().Uint8Var(&snapshotVersion, "stapshots.version", 1, "specifies the snapshot file version") +} + func withStatsfile(cmd *cobra.Command) { cmd.Flags().StringVar(&statsfile, "statsfile", "stateless.csv", "path where to write the stats file") must(cmd.MarkFlagFilename("statsfile", "csv")) diff --git a/cmd/state/commands/opcode_tracer.go b/cmd/state/commands/opcode_tracer.go index c9cebb45e03..72901c7b1fa 100644 --- a/cmd/state/commands/opcode_tracer.go +++ b/cmd/state/commands/opcode_tracer.go @@ -44,6 +44,7 @@ var ( func init() { withBlock(opcodeTracerCmd) withDataDir(opcodeTracerCmd) + withSnapshotVersion(opcodeTracerCmd) opcodeTracerCmd.Flags().Uint64Var(&numBlocks, "numBlocks", 1, "number of blocks to run the operation on") opcodeTracerCmd.Flags().BoolVar(&saveOpcodes, "saveOpcodes", false, "set to save the opcodes") opcodeTracerCmd.Flags().BoolVar(&saveBBlocks, "saveBBlocks", false, "set to save the basic blocks") @@ -56,7 +57,7 @@ var opcodeTracerCmd = &cobra.Command{ Short: "Re-executes historical transactions in read-only mode and traces them at the opcode level", RunE: func(cmd *cobra.Command, args []string) error { logger := log.New("opcode-tracer", genesis.Config.ChainID) - return OpcodeTracer(genesis, block, chaindata, numBlocks, saveOpcodes, saveBBlocks, logger) + return OpcodeTracer(genesis, snapshotVersion, block, chaindata, numBlocks, saveOpcodes, saveBBlocks, logger) }, } @@ -395,7 +396,7 @@ type segPrefix struct { // OpcodeTracer re-executes historical transactions in read-only mode // and traces them at the opcode level -func OpcodeTracer(genesis *types.Genesis, blockNum uint64, chaindata string, numBlocks uint64, +func OpcodeTracer(genesis *types.Genesis, snapshotVersion uint8, blockNum uint64, chaindata string, numBlocks uint64, saveOpcodes bool, saveBblocks bool, logger log.Logger) error { blockNumOrig := blockNum @@ -428,7 +429,7 @@ func OpcodeTracer(genesis *types.Genesis, blockNum uint64, chaindata string, num } return nil }) - blockReader := freezeblocks.NewBlockReader(freezeblocks.NewRoSnapshots(ethconfig.BlocksFreezing{Enabled: false}, "", log.New()), nil /* BorSnapshots */) + blockReader := freezeblocks.NewBlockReader(freezeblocks.NewRoSnapshots(ethconfig.BlocksFreezing{Enabled: false}, "", snapshotVersion, log.New()), nil /* BorSnapshots */) chainConfig := genesis.Config vmConfig := vm.Config{Tracer: ot, Debug: true} diff --git a/cmd/state/commands/state_root.go b/cmd/state/commands/state_root.go index 8945289cff3..be55cce6621 100644 --- a/cmd/state/commands/state_root.go +++ b/cmd/state/commands/state_root.go @@ -35,6 +35,7 @@ import ( func init() { withBlock(stateRootCmd) withDataDir(stateRootCmd) + withSnapshotVersion(stateRootCmd) rootCmd.AddCommand(stateRootCmd) } @@ -43,11 +44,11 @@ var stateRootCmd = &cobra.Command{ Short: "Exerimental command to re-execute blocks from beginning and compute state root", RunE: func(cmd *cobra.Command, args []string) error { logger := debug.SetupCobra(cmd, "stateroot") - return StateRoot(cmd.Context(), genesis, block, datadirCli, logger) + return StateRoot(cmd.Context(), genesis, snapshotVersion, block, datadirCli, logger) }, } -func blocksIO(db kv.RoDB) (services.FullBlockReader, *blockio.BlockWriter) { +func blocksIO(db kv.RoDB, snapshotVersion uint8) (services.FullBlockReader, *blockio.BlockWriter) { var histV3 bool if err := db.View(context.Background(), func(tx kv.Tx) error { histV3, _ = kvcfg.HistoryV3.Enabled(tx) @@ -55,12 +56,12 @@ func blocksIO(db kv.RoDB) (services.FullBlockReader, *blockio.BlockWriter) { }); err != nil { panic(err) } - br := freezeblocks.NewBlockReader(freezeblocks.NewRoSnapshots(ethconfig.BlocksFreezing{Enabled: false}, "", log.New()), nil /* BorSnapshots */) + br := freezeblocks.NewBlockReader(freezeblocks.NewRoSnapshots(ethconfig.BlocksFreezing{Enabled: false}, "", snapshotVersion, log.New()), nil /* BorSnapshots */) bw := blockio.NewBlockWriter(histV3) return br, bw } -func StateRoot(ctx context.Context, genesis *types.Genesis, blockNum uint64, datadir string, logger log.Logger) error { +func StateRoot(ctx context.Context, genesis *types.Genesis, snapshotVersion uint8, blockNum uint64, datadir string, logger log.Logger) error { sigs := make(chan os.Signal, 1) interruptCh := make(chan bool, 1) signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM) @@ -93,7 +94,7 @@ func StateRoot(ctx context.Context, genesis *types.Genesis, blockNum uint64, dat return err2 } defer db.Close() - blockReader, _ := blocksIO(db) + blockReader, _ := blocksIO(db, snapshotVersion) chainConfig := genesis.Config vmConfig := vm.Config{} diff --git a/cmd/state/commands/verify_txlookup.go b/cmd/state/commands/verify_txlookup.go index 8dd27671015..3b5c4707c22 100644 --- a/cmd/state/commands/verify_txlookup.go +++ b/cmd/state/commands/verify_txlookup.go @@ -8,6 +8,7 @@ import ( func init() { withDataDir(verifyTxLookupCmd) + withSnapshotVersion(verifyTxLookupCmd) rootCmd.AddCommand(verifyTxLookupCmd) } @@ -16,6 +17,6 @@ var verifyTxLookupCmd = &cobra.Command{ Short: "Generate tx lookup index", RunE: func(cmd *cobra.Command, args []string) error { logger := debug.SetupCobra(cmd, "verify_txlookup") - return verify.ValidateTxLookups(chaindata, logger) + return verify.ValidateTxLookups(chaindata, snapshotVersion, logger) }, } diff --git a/cmd/state/verify/verify_txlookup.go b/cmd/state/verify/verify_txlookup.go index 3a7351d11b8..625ef1fc717 100644 --- a/cmd/state/verify/verify_txlookup.go +++ b/cmd/state/verify/verify_txlookup.go @@ -20,7 +20,7 @@ import ( "github.com/ledgerwatch/log/v3" ) -func blocksIO(db kv.RoDB) (services.FullBlockReader, *blockio.BlockWriter) { +func blocksIO(db kv.RoDB, snapshotVersion uint8) (services.FullBlockReader, *blockio.BlockWriter) { var histV3 bool if err := db.View(context.Background(), func(tx kv.Tx) error { histV3, _ = kvcfg.HistoryV3.Enabled(tx) @@ -28,14 +28,14 @@ func blocksIO(db kv.RoDB) (services.FullBlockReader, *blockio.BlockWriter) { }); err != nil { panic(err) } - br := freezeblocks.NewBlockReader(freezeblocks.NewRoSnapshots(ethconfig.BlocksFreezing{Enabled: false}, "", log.New()), nil /* BorSnapshots */) + br := freezeblocks.NewBlockReader(freezeblocks.NewRoSnapshots(ethconfig.BlocksFreezing{Enabled: false}, "", snapshotVersion, log.New()), nil /* BorSnapshots */) bw := blockio.NewBlockWriter(histV3) return br, bw } -func ValidateTxLookups(chaindata string, logger log.Logger) error { +func ValidateTxLookups(chaindata string, snapshotVersion uint8, logger log.Logger) error { db := mdbx.MustOpen(chaindata) - br, _ := blocksIO(db) + br, _ := blocksIO(db, snapshotVersion) tx, err := db.BeginRo(context.Background()) if err != nil { return err diff --git a/cmd/tooling/cli.go b/cmd/tooling/cli.go index 1fc0ae9a558..a30a30a4ad8 100644 --- a/cmd/tooling/cli.go +++ b/cmd/tooling/cli.go @@ -12,6 +12,7 @@ import ( "github.com/ledgerwatch/erigon/turbo/snapshotsync/freezeblocks" "golang.org/x/net/context" + "github.com/ledgerwatch/erigon-lib/chain/snapcfg" "github.com/ledgerwatch/erigon-lib/common/datadir" "github.com/ledgerwatch/erigon-lib/downloader/snaptype" "github.com/ledgerwatch/erigon/cl/persistence" @@ -78,7 +79,10 @@ func (c *BucketCaplinAutomation) Run(ctx *Context) error { tickerTriggerer := time.NewTicker(c.UploadPeriod) defer tickerTriggerer.Stop() // do the checking at first run - if err := checkSnapshots(ctx, beaconConfig, dirs); err != nil { + + snapshotVersion := snapcfg.KnownCfg(c.Chain, 0).Version + + if err := checkSnapshots(ctx, beaconConfig, dirs, snapshotVersion); err != nil { return err } log.Info("Uploading snapshots to R2 bucket") @@ -93,7 +97,9 @@ func (c *BucketCaplinAutomation) Run(ctx *Context) error { select { case <-tickerTriggerer.C: log.Info("Checking snapshots") - if err := checkSnapshots(ctx, beaconConfig, dirs); err != nil { + snapshotVersion := snapcfg.KnownCfg(c.Chain, 0).Version + + if err := checkSnapshots(ctx, beaconConfig, dirs, snapshotVersion); err != nil { return err } log.Info("Finishing snapshots") @@ -111,7 +117,7 @@ func (c *BucketCaplinAutomation) Run(ctx *Context) error { } } -func checkSnapshots(ctx context.Context, beaconConfig *clparams.BeaconChainConfig, dirs datadir.Dirs) error { +func checkSnapshots(ctx context.Context, beaconConfig *clparams.BeaconChainConfig, dirs datadir.Dirs, snapshotVersion uint8) error { rawDB, _ := persistence.AferoRawBeaconBlockChainFromOsPath(beaconConfig, dirs.CaplinHistory) _, db, err := caplin1.OpenCaplinDatabase(ctx, db_config.DatabaseConfiguration{PruneDepth: math.MaxUint64}, beaconConfig, rawDB, dirs.CaplinIndexing, nil, false) if err != nil { @@ -132,7 +138,7 @@ func checkSnapshots(ctx context.Context, beaconConfig *clparams.BeaconChainConfi to = (to / snaptype.Erigon2MergeLimit) * snaptype.Erigon2MergeLimit - csn := freezeblocks.NewCaplinSnapshots(ethconfig.BlocksFreezing{}, beaconConfig, dirs.Snap, log.Root()) + csn := freezeblocks.NewCaplinSnapshots(ethconfig.BlocksFreezing{}, beaconConfig, dirs.Snap, snapshotVersion, log.Root()) if err := csn.ReopenFolder(); err != nil { return err } diff --git a/cmd/utils/flags.go b/cmd/utils/flags.go index 4d30daa13ce..86ed9247b3e 100644 --- a/cmd/utils/flags.go +++ b/cmd/utils/flags.go @@ -1335,7 +1335,7 @@ func setGPOCobra(f *pflag.FlagSet, cfg *gaspricecfg.Config) { func setTxPool(ctx *cli.Context, fullCfg *ethconfig.Config) { cfg := &fullCfg.DeprecatedTxPool - if ctx.IsSet(TxPoolDisableFlag.Name) { + if ctx.IsSet(TxPoolDisableFlag.Name) || TxPoolDisableFlag.Value { cfg.Disable = true } if ctx.IsSet(TxPoolLocalsFlag.Name) { diff --git a/erigon-lib/chain/snapcfg/util.go b/erigon-lib/chain/snapcfg/util.go index 8f5ecf2f3ae..db1e42d3276 100644 --- a/erigon-lib/chain/snapcfg/util.go +++ b/erigon-lib/chain/snapcfg/util.go @@ -49,23 +49,54 @@ func doSort(in preverified) Preverified { } var ( - MainnetChainSnapshotCfg = newCfg(Mainnet) - // HoleskyChainSnapshotCfg = newCfg(Holesky, HoleskyHistory) - SepoliaChainSnapshotCfg = newCfg(Sepolia) - GoerliChainSnapshotCfg = newCfg(Goerli) - MumbaiChainSnapshotCfg = newCfg(Mumbai) - AmoyChainSnapshotCfg = newCfg(Amoy) - BorMainnetChainSnapshotCfg = newCfg(BorMainnet) - GnosisChainSnapshotCfg = newCfg(Gnosis) - ChiadoChainSnapshotCfg = newCfg(Chiado) + isDefaultVersion bool = true + snapshotVersion uint8 = 1 ) -func newCfg(preverified Preverified) *Cfg { - return &Cfg{ExpectBlocks: maxBlockNum(preverified), Preverified: preverified} +func SnapshotVersion(version uint8) { + snapshotVersion = version + isDefaultVersion = false } -func maxBlockNum(preverified Preverified) uint64 { +func newCfg(preverified Preverified, version uint8) *Cfg { + + if version == 0 { + version = snapshotVersion + + var pv Preverified + + for _, p := range preverified { + if v, _, ok := strings.Cut(p.Name, "-"); ok && strings.HasPrefix(v, "v") { + if v, err := strconv.ParseUint(v[1:], 10, 8); err == nil && uint64(version) == v { + pv = append(pv, p) + } + } + } + + // don't do this check if the SnapshotVersion has been explicitly set + if len(pv) == 0 && isDefaultVersion { + version = maxVersion(preverified) + + for _, p := range preverified { + if v, _, ok := strings.Cut(p.Name, "-"); ok && strings.HasPrefix(v, "v") { + if v, err := strconv.ParseUint(v[1:], 10, 8); err == nil && uint64(version) == v { + pv = append(pv, p) + } + } + } + } + + preverified = pv + } + + maxBlockNum, version := cfgInfo(preverified, version) + return &Cfg{ExpectBlocks: maxBlockNum, Preverified: preverified, Version: version} +} + +func cfgInfo(preverified Preverified, defaultVersion uint8) (uint64, uint8) { max := uint64(0) + version := defaultVersion + for _, p := range preverified { _, fileName := filepath.Split(p.Name) ext := filepath.Ext(fileName) @@ -84,37 +115,61 @@ func maxBlockNum(preverified Preverified) uint64 { if max < to { max = to } + + if vp := parts[0]; strings.HasPrefix(vp, "v") { + if v, err := strconv.ParseUint(vp[1:], 10, 8); err == nil { + version = uint8(v) + } + } } if max == 0 { // to prevent underflow - return 0 + return 0, version } - return max*1_000 - 1 + return max*1_000 - 1, version } type Cfg struct { ExpectBlocks uint64 + Version uint8 Preverified Preverified } -var KnownCfgs = map[string]*Cfg{ - networkname.MainnetChainName: MainnetChainSnapshotCfg, +var knownPreverified = map[string]Preverified{ + networkname.MainnetChainName: Mainnet, // networkname.HoleskyChainName: HoleskyChainSnapshotCfg, - networkname.SepoliaChainName: SepoliaChainSnapshotCfg, - networkname.GoerliChainName: GoerliChainSnapshotCfg, - networkname.MumbaiChainName: MumbaiChainSnapshotCfg, - networkname.AmoyChainName: AmoyChainSnapshotCfg, - networkname.BorMainnetChainName: BorMainnetChainSnapshotCfg, - networkname.GnosisChainName: GnosisChainSnapshotCfg, - networkname.ChiadoChainName: ChiadoChainSnapshotCfg, + networkname.SepoliaChainName: Sepolia, + networkname.GoerliChainName: Goerli, + networkname.MumbaiChainName: Mumbai, + networkname.AmoyChainName: Amoy, + networkname.BorMainnetChainName: BorMainnet, + networkname.GnosisChainName: Gnosis, + networkname.ChiadoChainName: Chiado, } // KnownCfg return list of preverified hashes for given network, but apply whiteList filter if it's not empty -func KnownCfg(networkName string) *Cfg { - c, ok := KnownCfgs[networkName] +func KnownCfg(networkName string, version uint8) *Cfg { + c, ok := knownPreverified[networkName] if !ok { - return newCfg(Preverified{}) + return newCfg(Preverified{}, version) + } + return newCfg(c, version) +} + +func maxVersion(pv Preverified) uint8 { + var max uint8 + + for _, p := range pv { + if v, _, ok := strings.Cut(p.Name, "-"); ok && strings.HasPrefix(v, "v") { + if v, err := strconv.ParseUint(v[1:], 10, 8); err == nil { + version := uint8(v) + if max < version { + max = version + } + } + } } - return newCfg(c.Preverified) + + return max } var KnownWebseeds = map[string][]string{ diff --git a/erigon-lib/common/dbg/experiments.go b/erigon-lib/common/dbg/experiments.go index 1c6ac65021c..e9df7ace44e 100644 --- a/erigon-lib/common/dbg/experiments.go +++ b/erigon-lib/common/dbg/experiments.go @@ -278,8 +278,24 @@ func StopAfterReconst() bool { v, _ := os.LookupEnv("STOP_AFTER_RECONSTITUTE") if v == "true" { stopAfterReconst = true - log.Info("[Experiment]", "STOP_AFTER_RECONSTITUTE", writeMap) + log.Info("[Experiment]", "STOP_AFTER_RECONSTITUTE", stopAfterReconst) } }) return stopAfterReconst } + +var ( + snapshotVersion uint8 + snapshotVersionOnce sync.Once +) + +func SnapshotVersion() uint8 { + snapshotVersionOnce.Do(func() { + v, _ := os.LookupEnv("SNAPSHOT_VERSION") + if i, _ := strconv.ParseUint(v, 10, 8); i > 0 { + snapshotVersion = uint8(i) + log.Info("[Experiment]", "SNAPSHOT_VERSION", snapshotVersion) + } + }) + return snapshotVersion +} diff --git a/erigon-lib/common/dir/rw_dir.go b/erigon-lib/common/dir/rw_dir.go index 0bbf76d8f5f..2d0e7066493 100644 --- a/erigon-lib/common/dir/rw_dir.go +++ b/erigon-lib/common/dir/rw_dir.go @@ -49,6 +49,17 @@ func FileExist(path string) bool { return true } +func FileNonZero(path string) bool { + fi, err := os.Stat(path) + if err != nil && os.IsNotExist(err) { + return false + } + if !fi.Mode().IsRegular() { + return false + } + return fi.Size() > 0 +} + // nolint func WriteFileWithFsync(name string, data []byte, perm os.FileMode) error { f, err := os.OpenFile(name, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, perm) diff --git a/erigon-lib/compress/decompress.go b/erigon-lib/compress/decompress.go index 52e6bad505c..7f058628691 100644 --- a/erigon-lib/compress/decompress.go +++ b/erigon-lib/compress/decompress.go @@ -347,6 +347,10 @@ func (d *Decompressor) ModTime() time.Time { return d.modTime } +func (d *Decompressor) IsOpen() bool { + return d != nil && d.f != nil +} + func (d *Decompressor) Close() { if d.f != nil { if err := mmap.Munmap(d.mmapHandle1, d.mmapHandle2); err != nil { diff --git a/erigon-lib/downloader/downloader.go b/erigon-lib/downloader/downloader.go index ec1c0f03a97..6f7d87a8d60 100644 --- a/erigon-lib/downloader/downloader.go +++ b/erigon-lib/downloader/downloader.go @@ -119,11 +119,13 @@ func New(ctx context.Context, cfg *downloadercfg.Cfg, dirs datadir.Dirs, logger d.webseeds.torrentFiles = d.torrentFiles d.ctx, d.stopMainLoop = context.WithCancel(ctx) - if err := d.BuildTorrentFilesIfNeed(d.ctx); err != nil { - return nil, err - } - if err := d.addTorrentFilesFromDisk(false); err != nil { - return nil, err + if cfg.AddTorrentsFromDisk { + if err := d.BuildTorrentFilesIfNeed(d.ctx); err != nil { + return nil, err + } + if err := d.addTorrentFilesFromDisk(false); err != nil { + return nil, err + } } // CornerCase: no peers -> no anoncments to trackers -> no magnetlink resolution (but magnetlink has filename) diff --git a/erigon-lib/downloader/downloadercfg/downloadercfg.go b/erigon-lib/downloader/downloadercfg/downloadercfg.go index dae659c6207..6a466c2fea5 100644 --- a/erigon-lib/downloader/downloadercfg/downloadercfg.go +++ b/erigon-lib/downloader/downloadercfg/downloadercfg.go @@ -17,9 +17,9 @@ package downloadercfg import ( - "io/ioutil" "net" "net/url" + "os" "path/filepath" "runtime" "strings" @@ -54,6 +54,7 @@ type Cfg struct { WebSeedS3Tokens []string ExpectedTorrentFilesHashes snapcfg.Preverified DownloadTorrentFilesFromWebseed bool + AddTorrentsFromDisk bool ChainName string Dirs datadir.Dirs @@ -188,17 +189,17 @@ func New(dirs datadir.Dirs, version string, verbosity lg.Level, downloadRate, up webseedFileProviders = append(webseedFileProviders, localCfgFile) } //TODO: if don't pass "downloaded files list here" (which we store in db) - synced erigon will download new .torrent files. And erigon can't work with "unfinished" files. - snapCfg := snapcfg.KnownCfg(chainName) + snapCfg := snapcfg.KnownCfg(chainName, 0) return &Cfg{Dirs: dirs, ChainName: chainName, ClientConfig: torrentConfig, DownloadSlots: downloadSlots, WebSeedUrls: webseedHttpProviders, WebSeedFiles: webseedFileProviders, WebSeedS3Tokens: webseedS3Providers, - DownloadTorrentFilesFromWebseed: true, ExpectedTorrentFilesHashes: snapCfg.Preverified, + DownloadTorrentFilesFromWebseed: true, AddTorrentsFromDisk: true, ExpectedTorrentFilesHashes: snapCfg.Preverified, }, nil } func getIpv6Enabled() bool { if runtime.GOOS == "linux" { - file, err := ioutil.ReadFile("/sys/module/ipv6/parameters/disable") + file, err := os.ReadFile("/sys/module/ipv6/parameters/disable") if err != nil { log.Warn("could not read /sys/module/ipv6/parameters/disable for ipv6 detection") return false diff --git a/erigon-lib/downloader/rclone.go b/erigon-lib/downloader/rclone.go new file mode 100644 index 00000000000..4f43eaba6fd --- /dev/null +++ b/erigon-lib/downloader/rclone.go @@ -0,0 +1,783 @@ +package downloader + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "io/fs" + "net" + "net/http" + "os" + "os/exec" + "os/signal" + "path/filepath" + "strconv" + "strings" + "sync" + "sync/atomic" + "syscall" + "time" + + "golang.org/x/exp/slices" + + "github.com/ledgerwatch/erigon-lib/common/dbg" + "github.com/ledgerwatch/erigon-lib/downloader/snaptype" + "github.com/ledgerwatch/log/v3" + "github.com/spaolacci/murmur3" + "golang.org/x/sync/errgroup" +) + +type rcloneInfo struct { + sync.Mutex + file string + snapInfo *snaptype.FileInfo + remoteInfo remoteInfo + localInfo fs.FileInfo +} + +func (i *rcloneInfo) Version() uint8 { + if i.snapInfo != nil { + return i.snapInfo.Version + } + + return 0 +} + +func (i *rcloneInfo) From() uint64 { + if i.snapInfo != nil { + return i.snapInfo.From + } + + return 0 +} + +func (i *rcloneInfo) To() uint64 { + if i.snapInfo != nil { + return i.snapInfo.To + } + + return 0 +} + +func (i *rcloneInfo) Type() snaptype.Type { + if i.snapInfo != nil { + return i.snapInfo.T + } + + return snaptype.Unknown +} + +type RCloneClient struct { + rclone *exec.Cmd + rcloneUrl string + rcloneSession *http.Client + logger log.Logger +} + +func (c *RCloneClient) start(logger log.Logger) error { + c.logger = logger + + rclone, _ := exec.LookPath("rclone") + + if len(rclone) == 0 { + logger.Warn("[rclone] Uploading disabled: rclone not found in PATH") + return fmt.Errorf("rclone not found in PATH") + } + + if p, err := freePort(); err == nil { + ctx, cancel := context.WithCancel(context.Background()) + + addr := fmt.Sprintf("127.0.0.1:%d", p) + c.rclone = exec.CommandContext(ctx, rclone, "rcd", "--rc-addr", addr, "--rc-no-auth") + c.rcloneUrl = "http://" + addr + c.rcloneSession = &http.Client{} // no timeout - we're doing sync calls + + if err := c.rclone.Start(); err != nil { + cancel() + logger.Warn("[rclone] Uploading disabled: rclone didn't start", "err", err) + return fmt.Errorf("rclone didn't start: %w", err) + } else { + logger.Info("[rclone] rclone started", "addr", addr) + } + + go func() { + signalCh := make(chan os.Signal, 1) + signal.Notify(signalCh, syscall.SIGTERM, syscall.SIGINT) + + switch s := <-signalCh; s { + case syscall.SIGTERM, syscall.SIGINT: + cancel() + } + }() + } + + return nil +} + +func (c *RCloneClient) ListRemotes(ctx context.Context) ([]string, error) { + result, err := c.cmd(ctx, "config/listremotes", nil) + + if err != nil { + return nil, err + } + + remotes := struct { + Remotes []string `json:"remotes"` + }{} + + err = json.Unmarshal(result, &remotes) + + if err != nil { + return nil, err + } + + return remotes.Remotes, nil +} + +func (u *RCloneClient) sync(ctx context.Context, request *rcloneRequest) error { + _, err := u.cmd(ctx, "sync/sync", request) + return err +} + +/* +return retryConnects(ctx, func(ctx context.Context) error { + return client.CallContext(ctx, result, string(method), args...) +}) +} +*/ + +func isConnectionError(err error) bool { + var opErr *net.OpError + if errors.As(err, &opErr) { + return opErr.Op == "dial" + } + return false +} + +const connectionTimeout = time.Second * 5 + +func retry(ctx context.Context, op func(context.Context) error, isRecoverableError func(error) bool, delay time.Duration, lastErr error) error { + err := op(ctx) + if err == nil { + return nil + } + if errors.Is(err, context.DeadlineExceeded) && lastErr != nil { + return lastErr + } + if !isRecoverableError(err) { + return err + } + + delayTimer := time.NewTimer(delay) + select { + case <-delayTimer.C: + return retry(ctx, op, isRecoverableError, delay, err) + case <-ctx.Done(): + if errors.Is(ctx.Err(), context.DeadlineExceeded) { + return err + } + return ctx.Err() + } +} + +func (u *RCloneClient) cmd(ctx context.Context, path string, args interface{}) ([]byte, error) { + requestBody, err := json.Marshal(args) + + if err != nil { + return nil, err + } + + request, err := http.NewRequestWithContext(ctx, http.MethodPost, + u.rcloneUrl+"/"+path, bytes.NewBuffer(requestBody)) + + if err != nil { + return nil, err + } + + request.Header.Set("Content-Type", "application/json") + + ctx, cancel := context.WithTimeout(ctx, connectionTimeout) + defer cancel() + + var response *http.Response + + err = retry(ctx, func(ctx context.Context) error { + response, err = u.rcloneSession.Do(request) //nolint:bodyclose + return err + }, isConnectionError, time.Millisecond*200, nil) + + if err != nil { + return nil, err + } + + defer response.Body.Close() + + if response.StatusCode != http.StatusOK { + responseBody := struct { + Error string `json:"error"` + }{} + + if err := json.NewDecoder(response.Body).Decode(&responseBody); err == nil && len(responseBody.Error) > 0 { + u.logger.Warn("[rclone] cmd failed", "path", path, "status", response.Status, "err", responseBody.Error) + return nil, fmt.Errorf("cmd: %s failed: %s: %s", path, response.Status, responseBody.Error) + } else { + u.logger.Warn("[rclone] cmd failed", "path", path, "status", response.Status) + return nil, fmt.Errorf("cmd: %s failed: %s", path, response.Status) + } + } + + return io.ReadAll(response.Body) +} + +type RCloneSession struct { + *RCloneClient + sync.Mutex + files map[string]*rcloneInfo + oplock sync.Mutex + remoteFs string + localFs string + syncQueue chan syncRequest + syncScheduled atomic.Bool + activeSyncCount atomic.Int32 + cancel context.CancelFunc +} + +var rcClient RCloneClient +var rcClientStart sync.Once + +func NewRCloneClient(logger log.Logger) (*RCloneClient, error) { + var err error + + rcClientStart.Do(func() { + err = rcClient.start(logger) + }) + + if err != nil { + return nil, err + } + + return &rcClient, nil +} + +func freePort() (port int, err error) { + if a, err := net.ResolveTCPAddr("tcp", "127.0.0.1:0"); err != nil { + return 0, err + } else { + if l, err := net.ListenTCP("tcp", a); err != nil { + return 0, err + } else { + defer l.Close() + return l.Addr().(*net.TCPAddr).Port, nil + } + } +} + +func (c *RCloneClient) NewSession(ctx context.Context, localFs string, remoteFs string) (*RCloneSession, error) { + ctx, cancel := context.WithCancel(ctx) + + session := &RCloneSession{ + RCloneClient: c, + files: map[string]*rcloneInfo{}, + remoteFs: remoteFs, + localFs: localFs, + cancel: cancel, + syncQueue: make(chan syncRequest, 100), + } + + go func() { + if _, err := session.ReadRemoteDir(ctx, true); err == nil { + session.syncFiles(ctx) + } + }() + + return session, nil +} + +func (c *RCloneSession) RemoteFsRoot() string { + return c.remoteFs +} + +func (c *RCloneSession) LocalFsRoot() string { + return c.localFs +} + +func (c *RCloneSession) Stop() { + c.cancel() +} + +type syncRequest struct { + ctx context.Context + info map[string]*rcloneInfo + cerr chan error + request *rcloneRequest + retryTime time.Duration +} + +func (c *RCloneSession) Upload(ctx context.Context, files ...string) error { + c.Lock() + + reqInfo := map[string]*rcloneInfo{} + + for _, file := range files { + info, ok := c.files[file] + + if !ok || info.localInfo == nil { + localInfo, err := os.Stat(filepath.Join(c.localFs, file)) + + if err != nil { + c.Unlock() + return fmt.Errorf("can't upload: %s: %w", file, err) + } + + if !localInfo.Mode().IsRegular() || localInfo.Size() == 0 { + c.Unlock() + return fmt.Errorf("can't upload: %s: %s", file, "file is not uploadable") + } + + if ok { + info.localInfo = localInfo + } else { + info := &rcloneInfo{ + file: file, + localInfo: localInfo, + } + + if snapInfo, ok := snaptype.ParseFileName(c.localFs, file); ok { + info.snapInfo = &snapInfo + } + + c.files[file] = info + } + } else { + reqInfo[file] = info + } + } + + c.Unlock() + + cerr := make(chan error, 1) + + c.syncQueue <- syncRequest{ctx, reqInfo, cerr, + &rcloneRequest{ + Group: c.Label(), + SrcFs: c.localFs, + DstFs: c.remoteFs, + Filter: rcloneFilter{ + IncludeRule: files, + }}, 0} + + return <-cerr +} + +func (c *RCloneSession) Download(ctx context.Context, files ...string) error { + c.Lock() + + if len(c.files) == 0 { + c.Unlock() + _, err := c.ReadRemoteDir(ctx, false) + if err != nil { + return fmt.Errorf("can't download: %s: %w", files, err) + } + c.Lock() + } + + reqInfo := map[string]*rcloneInfo{} + + for _, file := range files { + info, ok := c.files[file] + + if !ok || info.remoteInfo.Size == 0 { + c.Unlock() + return fmt.Errorf("can't download: %s: %w", file, os.ErrNotExist) + } + + reqInfo[file] = info + } + + c.Unlock() + + cerr := make(chan error, 1) + + c.syncQueue <- syncRequest{ctx, reqInfo, cerr, + &rcloneRequest{ + SrcFs: c.remoteFs, + DstFs: c.localFs, + Filter: rcloneFilter{ + IncludeRule: files, + }}, 0} + + return <-cerr +} + +func (c *RCloneSession) Cat(ctx context.Context, file string) (io.Reader, error) { + rclone, err := exec.LookPath("rclone") + + if err != nil { + return nil, err + } + + cmd := exec.CommandContext(ctx, rclone, "cat", c.remoteFs+"/"+file) + + stdout, err := cmd.StdoutPipe() + + if err != nil { + return nil, err + } + + if err := cmd.Start(); err != nil { + return nil, err + } + + return stdout, nil +} + +func (c *RCloneSession) ReadLocalDir(ctx context.Context) ([]fs.DirEntry, error) { + return os.ReadDir(c.localFs) +} + +func (c *RCloneSession) Label() string { + return strconv.FormatUint(murmur3.Sum64([]byte(c.localFs+"<->"+c.remoteFs)), 36) +} + +type remoteInfo struct { + Name string + Size uint64 + ModTime time.Time +} + +type SnapInfo interface { + Version() uint8 + From() uint64 + To() uint64 + Type() snaptype.Type +} + +type fileInfo struct { + *rcloneInfo +} + +func (fi *fileInfo) Name() string { + return fi.file +} + +func (fi *fileInfo) Size() int64 { + return int64(fi.remoteInfo.Size) +} + +func (fi *fileInfo) Mode() fs.FileMode { + return fs.ModeIrregular +} + +func (fi *fileInfo) ModTime() time.Time { + return fi.remoteInfo.ModTime +} + +func (fi *fileInfo) IsDir() bool { + return false +} + +func (fi *fileInfo) Sys() any { + return fi.rcloneInfo +} + +type dirEntry struct { + info *fileInfo +} + +func (e dirEntry) Name() string { + return e.info.Name() +} + +func (e dirEntry) IsDir() bool { + return e.info.IsDir() +} + +func (e dirEntry) Type() fs.FileMode { + return e.info.Mode() +} + +func (e dirEntry) Info() (fs.FileInfo, error) { + return e.info, nil +} + +var ErrAccessDenied = errors.New("access denied") + +func (c *RCloneSession) ReadRemoteDir(ctx context.Context, refresh bool) ([]fs.DirEntry, error) { + if len(c.remoteFs) == 0 { + return nil, fmt.Errorf("remote fs undefined") + } + + c.oplock.Lock() + defer c.oplock.Unlock() + + c.Lock() + fileCount := len(c.files) + c.Unlock() + + if fileCount == 0 || refresh { + listBody, err := json.Marshal(struct { + Fs string `json:"fs"` + Remote string `json:"remote"` + }{ + Fs: c.remoteFs, + Remote: "", + }) + + if err != nil { + return nil, fmt.Errorf("can't marshal list request: %w", err) + } + + listRequest, err := http.NewRequestWithContext(ctx, http.MethodPost, + c.rcloneUrl+"/operations/list", bytes.NewBuffer(listBody)) + + if err != nil { + return nil, fmt.Errorf("can't create list request: %w", err) + } + + listRequest.Header.Set("Content-Type", "application/json") + + var response *http.Response + + for i := 0; i < 10; i++ { + response, err = c.rcloneSession.Do(listRequest) //nolint:bodyclose + if err == nil { + break + } + time.Sleep(2 * time.Second) + } + + if err != nil { + return nil, fmt.Errorf("can't get remote list: %w", err) + } + + defer response.Body.Close() + + if response.StatusCode != http.StatusOK { + body, _ := io.ReadAll(response.Body) + e := struct { + Error string `json:"error"` + }{} + + if err := json.Unmarshal(body, &e); err == nil { + if strings.Contains(e.Error, "AccessDenied") { + return nil, fmt.Errorf("can't get remote list: %w", ErrAccessDenied) + } + } + + return nil, fmt.Errorf("can't get remote list: %s: %s", response.Status, string(body)) + } + + responseBody := struct { + List []remoteInfo `json:"list"` + }{} + + if err := json.NewDecoder(response.Body).Decode(&responseBody); err != nil { + return nil, fmt.Errorf("can't decode remote list: %w", err) + } + + for _, fi := range responseBody.List { + localInfo, _ := os.Stat(filepath.Join(c.localFs, fi.Name)) + + c.Lock() + if rcinfo, ok := c.files[fi.Name]; ok { + rcinfo.localInfo = localInfo + rcinfo.remoteInfo = fi + + if snapInfo, ok := snaptype.ParseFileName(c.localFs, fi.Name); ok { + rcinfo.snapInfo = &snapInfo + } else { + rcinfo.snapInfo = nil + } + + } else { + info := &rcloneInfo{ + file: fi.Name, + localInfo: localInfo, + remoteInfo: fi, + } + + if snapInfo, ok := snaptype.ParseFileName(c.localFs, fi.Name); ok { + info.snapInfo = &snapInfo + } + + c.files[fi.Name] = info + } + c.Unlock() + } + } + + var entries = make([]fs.DirEntry, 0, len(c.files)) + + for _, info := range c.files { + if info.remoteInfo.Size > 0 { + entries = append(entries, &dirEntry{&fileInfo{info}}) + } + } + + slices.SortFunc(entries, func(a, b fs.DirEntry) int { + return strings.Compare(a.Name(), b.Name()) + }) + + return entries, nil +} + +type rcloneFilter struct { + IncludeRule []string `json:"IncludeRule"` +} + +type rcloneRequest struct { + Async bool `json:"_async,omitempty"` + Config map[string]interface{} `json:"_config,omitempty"` + Group string `json:"group"` + SrcFs string `json:"srcFs"` + DstFs string `json:"dstFs"` + Filter rcloneFilter `json:"_filter"` +} + +func (c *RCloneSession) syncFiles(ctx context.Context) { + if !c.syncScheduled.CompareAndSwap(false, true) { + return + } + + g, gctx := errgroup.WithContext(ctx) + g.SetLimit(16) + + minRetryTime := 30 * time.Second + maxRetryTime := 300 * time.Second + + retry := func(request syncRequest) { + switch { + case request.retryTime == 0: + request.retryTime = minRetryTime + case request.retryTime < maxRetryTime: + request.retryTime += request.retryTime + default: + request.retryTime = maxRetryTime + } + + retryTimer := time.NewTicker(request.retryTime) + + select { + case <-request.ctx.Done(): + request.cerr <- request.ctx.Err() + return + case <-retryTimer.C: + } + + c.Lock() + syncQueue := c.syncQueue + c.Unlock() + + if syncQueue != nil { + syncQueue <- request + } else { + request.cerr <- fmt.Errorf("no sync queue available") + } + } + + go func() { + logEvery := time.NewTicker(20 * time.Second) + defer logEvery.Stop() + + select { + case <-gctx.Done(): + if syncCount := int(c.activeSyncCount.Load()) + len(c.syncQueue); syncCount > 0 { + log.Info("[rclone] Synced files", "processed", fmt.Sprintf("%d/%d", c.activeSyncCount.Load(), syncCount)) + } + + c.Lock() + syncQueue := c.syncQueue + c.syncQueue = nil + c.Unlock() + + if syncQueue != nil { + close(syncQueue) + } + + return + case <-logEvery.C: + if syncCount := int(c.activeSyncCount.Load()) + len(c.syncQueue); syncCount > 0 { + log.Info("[rclone] Syncing files", "progress", fmt.Sprintf("%d/%d", c.activeSyncCount.Load(), syncCount)) + } + } + }() + + go func() { + for req := range c.syncQueue { + + if gctx.Err() != nil { + req.cerr <- gctx.Err() + continue + } + + func(req syncRequest) { + g.Go(func() error { + c.activeSyncCount.Add(1) + + defer func() { + c.activeSyncCount.Add(-1) + if r := recover(); r != nil { + log.Error("[rclone] snapshot sync failed", "err", r, "stack", dbg.Stack()) + + if gctx.Err() != nil { + req.cerr <- gctx.Err() + } + + var err error + var ok bool + + if err, ok = r.(error); ok { + req.cerr <- fmt.Errorf("snapshot sync failed: %w", err) + } else { + req.cerr <- fmt.Errorf("snapshot sync failed: %s", r) + } + + return + } + }() + + if req.ctx.Err() != nil { + req.cerr <- req.ctx.Err() + return nil //nolint:nilerr + } + + if err := c.sync(gctx, req.request); err != nil { + + if gctx.Err() != nil { + req.cerr <- gctx.Err() + } else { + go retry(req) + } + + return nil //nolint:nilerr + } + + for _, info := range req.info { + localInfo, _ := os.Stat(filepath.Join(c.localFs, info.file)) + + info.Lock() + info.localInfo = localInfo + info.remoteInfo = remoteInfo{ + Name: info.file, + Size: uint64(localInfo.Size()), + ModTime: localInfo.ModTime(), + } + info.Unlock() + } + + req.cerr <- nil + return nil + }) + }(req) + } + + c.syncScheduled.Store(false) + + if err := g.Wait(); err != nil { + c.logger.Debug("[rclone] uploading failed", "err", err) + } + }() +} diff --git a/erigon-lib/downloader/rclone_test.go b/erigon-lib/downloader/rclone_test.go new file mode 100644 index 00000000000..b96f242fe06 --- /dev/null +++ b/erigon-lib/downloader/rclone_test.go @@ -0,0 +1,99 @@ +package downloader_test + +import ( + "context" + "errors" + "io" + "os" + "os/exec" + "testing" + + "github.com/ledgerwatch/erigon-lib/downloader" + "github.com/ledgerwatch/log/v3" +) + +func hasRClone() bool { + rclone, _ := exec.LookPath("rclone") + + if len(rclone) == 0 { + return false + } + + return true +} + +func TestDownload(t *testing.T) { + if !hasRClone() { + t.Skip("rclone not available") + } + + ctx := context.Background() + + tmpDir := t.TempDir() + remoteDir := "r2:erigon-v2-snapshots-bor-mainnet" + + cli, err := downloader.NewRCloneClient(log.Root()) + + if err != nil { + t.Fatal(err) + } + + rcc, err := cli.NewSession(ctx, tmpDir, remoteDir) + + if err != nil { + t.Fatal(err) + } + + dir, err := rcc.ReadRemoteDir(ctx, true) + + if err != nil { + if errors.Is(err, downloader.ErrAccessDenied) { + t.Skip("rclone dir not accessible") + } + + t.Fatal(err) + } + + for _, entry := range dir { + if len(entry.Name()) == 0 { + t.Fatal("unexpected nil file name") + } + //fmt.Println(entry.Name()) + } + + err = rcc.Download(ctx, "manifest.txt") + + if err != nil { + t.Fatal(err) + } + + h0, err := os.ReadFile("manifest.txt") + + if err != nil { + t.Fatal(err) + } + + if len(h0) == 0 { + t.Fatal("unexpected nil file") + } + //fmt.Print(string(h0)) + + reader, err := rcc.Cat(ctx, "manifest.txt") + + if err != nil { + t.Fatal(err) + } + + h1, err := io.ReadAll(reader) + + if err != nil { + t.Fatal(err) + } + + if string(h0) != string(h1) { + t.Fatal("Download and Cat contents mismatched") + } + //fmt.Print(string(h1)) + + rcc.Stop() +} diff --git a/erigon-lib/downloader/snaptype/files.go b/erigon-lib/downloader/snaptype/files.go index 650ec203dcf..274c91bd35f 100644 --- a/erigon-lib/downloader/snaptype/files.go +++ b/erigon-lib/downloader/snaptype/files.go @@ -35,17 +35,15 @@ import ( type Type int const ( + Unknown Type = -1 Headers Type = iota Bodies Transactions BorEvents BorSpans - NumberOfTypes BeaconBlocks ) -var BorSnapshotTypes = []Type{BorEvents, BorSpans} - func (ft Type) String() string { switch ft { case Headers: @@ -80,7 +78,7 @@ func ParseFileType(s string) (Type, bool) { case "beaconblocks": return BeaconBlocks, true default: - return NumberOfTypes, false + return Unknown, false } } @@ -94,16 +92,25 @@ func (it IdxType) String() string { return string(it) } var BlockSnapshotTypes = []Type{Headers, Bodies, Transactions} +var BorSnapshotTypes = []Type{BorEvents, BorSpans} + var ( ErrInvalidFileName = fmt.Errorf("invalid compressed file name") ) -func FileName(from, to uint64, fileType string) string { - return fmt.Sprintf("v1-%06d-%06d-%s", from/1_000, to/1_000, fileType) +func FileName(version uint8, from, to uint64, fileType string) string { + return fmt.Sprintf("v%d-%06d-%06d-%s", version, from/1_000, to/1_000, fileType) +} + +func SegmentFileName(version uint8, from, to uint64, t Type) string { + return FileName(version, from, to, t.String()) + ".seg" +} +func DatFileName(version uint8, from, to uint64, fType string) string { + return FileName(version, from, to, fType) + ".dat" +} +func IdxFileName(version uint8, from, to uint64, fType string) string { + return FileName(version, from, to, fType) + ".idx" } -func SegmentFileName(from, to uint64, t Type) string { return FileName(from, to, t.String()) + ".seg" } -func DatFileName(from, to uint64, fType string) string { return FileName(from, to, fType) + ".dat" } -func IdxFileName(from, to uint64, fType string) string { return FileName(from, to, fType) + ".idx" } func FilterExt(in []FileInfo, expectExt string) (out []FileInfo) { for _, f := range in { @@ -114,8 +121,8 @@ func FilterExt(in []FileInfo, expectExt string) (out []FileInfo) { } return out } -func FilesWithExt(dir, expectExt string) ([]FileInfo, error) { - files, err := ParseDir(dir) +func FilesWithExt(dir string, version uint8, expectExt string) ([]FileInfo, error) { + files, err := ParseDir(dir, version) if err != nil { return nil, err } @@ -139,8 +146,16 @@ func ParseFileName(dir, fileName string) (res FileInfo, ok bool) { if len(parts) < 4 { return res, ok } - version := parts[0] - _ = version + + var version uint8 + if len(parts[0]) > 1 && parts[0][0] == 'v' { + v, err := strconv.ParseUint(parts[0][1:], 10, 64) + if err != nil { + return + } + version = uint8(v) + } + from, err := strconv.ParseUint(parts[1], 10, 64) if err != nil { return @@ -153,7 +168,8 @@ func ParseFileName(dir, fileName string) (res FileInfo, ok bool) { if !ok { return res, ok } - return FileInfo{From: from * 1_000, To: to * 1_000, Path: filepath.Join(dir, fileName), T: ft, Ext: ext}, ok + + return FileInfo{Version: version, From: from * 1_000, To: to * 1_000, Path: filepath.Join(dir, fileName), T: ft, Ext: ext}, ok } const Erigon3SeedableSteps = 32 @@ -185,9 +201,13 @@ func (f FileInfo) Seedable() bool { func (f FileInfo) NeedTorrentFile() bool { return f.Seedable() && !f.TorrentFileExists() } func (f FileInfo) Name() string { return filepath.Base(f.Path) } -func IdxFiles(dir string) (res []FileInfo, err error) { return FilesWithExt(dir, ".idx") } -func Segments(dir string) (res []FileInfo, err error) { return FilesWithExt(dir, ".seg") } -func TmpFiles(dir string) (res []string, err error) { +func IdxFiles(dir string, version uint8) (res []FileInfo, err error) { + return FilesWithExt(dir, version, ".idx") +} +func Segments(dir string, version uint8) (res []FileInfo, err error) { + return FilesWithExt(dir, version, ".seg") +} +func TmpFiles(dir string, version uint8) (res []string, err error) { files, err := os.ReadDir(dir) if err != nil { if errors.Is(err, os.ErrNotExist) { @@ -195,20 +215,24 @@ func TmpFiles(dir string) (res []string, err error) { } return nil, err } + + v := fmt.Sprint("v", version) + for _, f := range files { - if f.IsDir() || len(f.Name()) < 3 { + if f.IsDir() || len(f.Name()) < 3 || !strings.HasPrefix(f.Name(), v) { continue } if filepath.Ext(f.Name()) != ".tmp" { continue } + res = append(res, filepath.Join(dir, f.Name())) } return res, nil } // ParseDir - reading dir ( -func ParseDir(dir string) (res []FileInfo, err error) { +func ParseDir(dir string, version uint8) (res []FileInfo, err error) { files, err := os.ReadDir(dir) if err != nil { if errors.Is(err, os.ErrNotExist) { @@ -216,12 +240,15 @@ func ParseDir(dir string) (res []FileInfo, err error) { } return nil, err } + + v := fmt.Sprint("v", version) + for _, f := range files { fileInfo, err := f.Info() if err != nil { return nil, err } - if f.IsDir() || fileInfo.Size() == 0 || len(f.Name()) < 3 { + if f.IsDir() || fileInfo.Size() == 0 || len(f.Name()) < 3 || !strings.HasPrefix(f.Name(), v) { continue } diff --git a/erigon-lib/downloader/util.go b/erigon-lib/downloader/util.go index cd5bc26d8dd..02437c38e5f 100644 --- a/erigon-lib/downloader/util.go +++ b/erigon-lib/downloader/util.go @@ -155,6 +155,7 @@ func BuildTorrentIfNeed(ctx context.Context, fName, root string, torrentFiles *T if torrentFiles.Exists(fName) { return nil } + fPath := filepath.Join(root, fName) if !dir2.FileExist(fPath) { return nil diff --git a/erigon-lib/recsplit/index.go b/erigon-lib/recsplit/index.go index 277db2d5fdf..c10fa0205d4 100644 --- a/erigon-lib/recsplit/index.go +++ b/erigon-lib/recsplit/index.go @@ -178,6 +178,7 @@ func (idx *Index) ModTime() time.Time { return idx.modTime } func (idx *Index) BaseDataID() uint64 { return idx.baseDataID } func (idx *Index) FilePath() string { return idx.filePath } func (idx *Index) FileName() string { return idx.fileName } +func (idx *Index) IsOpen() bool { return idx != nil && idx.f != nil } func (idx *Index) Close() { if idx == nil { diff --git a/erigon-lib/recsplit/recsplit.go b/erigon-lib/recsplit/recsplit.go index a019ca9b3f9..fc41a824c9b 100644 --- a/erigon-lib/recsplit/recsplit.go +++ b/erigon-lib/recsplit/recsplit.go @@ -556,6 +556,9 @@ func (rs *RecSplit) Build(ctx context.Context) error { if rs.indexF, err = os.Create(rs.tmpFilePath); err != nil { return fmt.Errorf("create index file %s: %w", rs.indexFile, err) } + + rs.logger.Debug("[index] created", "file", rs.tmpFilePath, "fs", rs.indexF) + defer rs.indexF.Close() rs.indexW = bufio.NewWriterSize(rs.indexF, etl.BufIOSize) // Write minimal app-specific dataID in this index file @@ -680,9 +683,12 @@ func (rs *RecSplit) Build(ctx context.Context) error { if err = rs.indexF.Close(); err != nil { return err } + if err = os.Rename(rs.tmpFilePath, rs.indexFile); err != nil { + rs.logger.Warn("[index] rename", "file", rs.tmpFilePath, "err", err) return err } + return nil } diff --git a/eth/backend.go b/eth/backend.go index b466b7450ef..ff77bfee98f 100644 --- a/eth/backend.go +++ b/eth/backend.go @@ -33,6 +33,7 @@ import ( lru "github.com/hashicorp/golang-lru/arc/v2" "github.com/ledgerwatch/erigon-lib/chain/networkname" + "github.com/ledgerwatch/erigon-lib/chain/snapcfg" "github.com/ledgerwatch/erigon-lib/diagnostics" "github.com/ledgerwatch/erigon-lib/downloader/downloadergrpc" "github.com/ledgerwatch/erigon-lib/kv/kvcfg" @@ -73,7 +74,7 @@ import ( libcommon "github.com/ledgerwatch/erigon-lib/common" "github.com/ledgerwatch/erigon-lib/common/datadir" "github.com/ledgerwatch/erigon-lib/direct" - downloader3 "github.com/ledgerwatch/erigon-lib/downloader" + downloader "github.com/ledgerwatch/erigon-lib/downloader" "github.com/ledgerwatch/erigon-lib/downloader/downloadercfg" proto_downloader "github.com/ledgerwatch/erigon-lib/gointerfaces/downloader" "github.com/ledgerwatch/erigon-lib/gointerfaces/grpcutil" @@ -193,7 +194,7 @@ type Ethereum struct { txPoolGrpcServer txpool_proto.TxpoolServer notifyMiningAboutNewTxs chan struct{} forkValidator *engine_helpers.ForkValidator - downloader *downloader3.Downloader + downloader *downloader.Downloader agg *libstate.AggregatorV3 blockSnapshots *freezeblocks.RoSnapshots @@ -319,9 +320,11 @@ func New(ctx context.Context, stack *node.Node, config *ethconfig.Config, logger logger.Info("Initialised chain configuration", "config", chainConfig, "genesis", genesis.Hash()) + snapshotVersion := snapcfg.KnownCfg(chainConfig.ChainName, 0).Version + // Check if we have an already initialized chain and fall back to // that if so. Otherwise we need to generate a new genesis spec. - blockReader, blockWriter, allSnapshots, agg, err := setUpBlockReader(ctx, chainKv, config.Dirs, config.Snapshot, config.HistoryV3, chainConfig.Bor != nil, logger) + blockReader, blockWriter, allSnapshots, agg, err := setUpBlockReader(ctx, chainKv, config.Dirs, snapshotVersion, config.Snapshot, config.HistoryV3, chainConfig.Bor != nil, logger) if err != nil { return nil, err } @@ -630,9 +633,10 @@ func New(ctx context.Context, stack *node.Node, config *ethconfig.Config, logger } // proof-of-work mining mining := stagedsync.New( + config.Sync, stagedsync.MiningStages(backend.sentryCtx, stagedsync.StageMiningCreateBlockCfg(backend.chainDB, miner, *backend.chainConfig, backend.engine, backend.txPoolDB, nil, tmpdir, backend.blockReader), - stagedsync.StageBorHeimdallCfg(backend.chainDB, snapDb, miner, *backend.chainConfig, heimdallClient, backend.blockReader, nil, nil, recents, signatures), + stagedsync.StageBorHeimdallCfg(backend.chainDB, snapDb, miner, *backend.chainConfig, heimdallClient, backend.blockReader, nil, nil, nil, recents, signatures), stagedsync.StageMiningExecCfg(backend.chainDB, miner, backend.notifications.Events, *backend.chainConfig, backend.engine, &vm.Config{}, tmpdir, nil, 0, backend.txPool, backend.txPoolDB, blockReader), stagedsync.StageHashStateCfg(backend.chainDB, dirs, config.HistoryV3), stagedsync.StageTrieCfg(backend.chainDB, false, true, true, tmpdir, blockReader, nil, config.HistoryV3, backend.agg), @@ -650,9 +654,10 @@ func New(ctx context.Context, stack *node.Node, config *ethconfig.Config, logger miningStatePos := stagedsync.NewProposingState(&config.Miner) miningStatePos.MiningConfig.Etherbase = param.SuggestedFeeRecipient proposingSync := stagedsync.New( + config.Sync, stagedsync.MiningStages(backend.sentryCtx, stagedsync.StageMiningCreateBlockCfg(backend.chainDB, miningStatePos, *backend.chainConfig, backend.engine, backend.txPoolDB, param, tmpdir, backend.blockReader), - stagedsync.StageBorHeimdallCfg(backend.chainDB, snapDb, miningStatePos, *backend.chainConfig, heimdallClient, backend.blockReader, nil, nil, recents, signatures), + stagedsync.StageBorHeimdallCfg(backend.chainDB, snapDb, miningStatePos, *backend.chainConfig, heimdallClient, backend.blockReader, nil, nil, nil, recents, signatures), stagedsync.StageMiningExecCfg(backend.chainDB, miningStatePos, backend.notifications.Events, *backend.chainConfig, backend.engine, &vm.Config{}, tmpdir, interrupt, param.PayloadId, backend.txPool, backend.txPoolDB, blockReader), stagedsync.StageHashStateCfg(backend.chainDB, dirs, config.HistoryV3), stagedsync.StageTrieCfg(backend.chainDB, false, true, true, tmpdir, blockReader, nil, config.HistoryV3, backend.agg), @@ -788,13 +793,13 @@ func New(ctx context.Context, stack *node.Node, config *ethconfig.Config, logger blockReader, blockRetire, backend.agg, backend.silkworm, backend.forkValidator, heimdallClient, recents, signatures, logger) backend.syncUnwindOrder = stagedsync.DefaultUnwindOrder backend.syncPruneOrder = stagedsync.DefaultPruneOrder - backend.stagedSync = stagedsync.New(backend.syncStages, backend.syncUnwindOrder, backend.syncPruneOrder, logger) + backend.stagedSync = stagedsync.New(config.Sync, backend.syncStages, backend.syncUnwindOrder, backend.syncPruneOrder, logger) hook := stages2.NewHook(backend.sentryCtx, backend.chainDB, backend.notifications, backend.stagedSync, backend.blockReader, backend.chainConfig, backend.logger, backend.sentriesClient.UpdateHead) checkStateRoot := true - pipelineStages := stages2.NewPipelineStages(ctx, chainKv, config, backend.sentriesClient, backend.notifications, backend.downloaderClient, blockReader, blockRetire, backend.agg, backend.silkworm, backend.forkValidator, logger, checkStateRoot) - backend.pipelineStagedSync = stagedsync.New(pipelineStages, stagedsync.PipelineUnwindOrder, stagedsync.PipelinePruneOrder, logger) + pipelineStages := stages2.NewPipelineStages(ctx, chainKv, config, stack.Config().P2P, backend.sentriesClient, backend.notifications, backend.downloaderClient, blockReader, blockRetire, backend.agg, backend.silkworm, backend.forkValidator, logger, checkStateRoot) + backend.pipelineStagedSync = stagedsync.New(config.Sync, pipelineStages, stagedsync.PipelineUnwindOrder, stagedsync.PipelinePruneOrder, logger) backend.eth1ExecutionServer = eth1.NewEthereumExecutionModule(blockReader, chainKv, backend.pipelineStagedSync, backend.forkValidator, chainConfig, assembleBlockPOS, hook, backend.notifications.Accumulator, backend.notifications.StateChangesConsumer, logger, backend.engine, config.HistoryV3) executionRpc := direct.NewExecutionClientDirect(backend.eth1ExecutionServer) engineBackendRPC := engineapi.NewEngineServer( @@ -875,7 +880,7 @@ func New(ctx context.Context, stack *node.Node, config *ethconfig.Config, logger go func() { eth1Getter := getters.NewExecutionSnapshotReader(ctx, beaconCfg, blockReader, backend.chainDB) - if err := caplin1.RunCaplinPhase1(ctx, client, engine, beaconCfg, genesisCfg, state, nil, dirs, config.BeaconRouter, eth1Getter, backend.downloaderClient, config.CaplinConfig.Backfilling, config.CaplinConfig.Archive, historyDB, indiciesDB); err != nil { + if err := caplin1.RunCaplinPhase1(ctx, client, engine, beaconCfg, genesisCfg, state, nil, dirs, snapshotVersion, config.BeaconRouter, eth1Getter, backend.downloaderClient, config.CaplinConfig.Backfilling, config.CaplinConfig.Archive, historyDB, indiciesDB); err != nil { logger.Error("could not start caplin", "err", err) } ctxCancel() @@ -1200,13 +1205,17 @@ func (s *Ethereum) setUpSnapDownloader(ctx context.Context, downloaderCfg *downl s.downloaderClient, err = downloadergrpc.NewClient(ctx, s.config.Snapshot.DownloaderAddr) } else { // start embedded Downloader + if uploadFs := s.config.Sync.UploadLocation; len(uploadFs) > 0 { + downloaderCfg.AddTorrentsFromDisk = false + } + discover := true - s.downloader, err = downloader3.New(ctx, downloaderCfg, s.config.Dirs, s.logger, log.LvlDebug, discover) + s.downloader, err = downloader.New(ctx, downloaderCfg, s.config.Dirs, s.logger, log.LvlDebug, discover) if err != nil { return err } s.downloader.MainLoopInBackground(true) - bittorrentServer, err := downloader3.NewGrpcServer(s.downloader) + bittorrentServer, err := downloader.NewGrpcServer(s.downloader) if err != nil { return fmt.Errorf("new server: %w", err) } @@ -1231,14 +1240,21 @@ func (s *Ethereum) setUpSnapDownloader(ctx context.Context, downloaderCfg *downl return err } -func setUpBlockReader(ctx context.Context, db kv.RwDB, dirs datadir.Dirs, snConfig ethconfig.BlocksFreezing, histV3 bool, isBor bool, logger log.Logger) (services.FullBlockReader, *blockio.BlockWriter, *freezeblocks.RoSnapshots, *libstate.AggregatorV3, error) { - allSnapshots := freezeblocks.NewRoSnapshots(snConfig, dirs.Snap, logger) +func setUpBlockReader(ctx context.Context, db kv.RwDB, dirs datadir.Dirs, snashotVersion uint8, snConfig ethconfig.BlocksFreezing, histV3 bool, isBor bool, logger log.Logger) (services.FullBlockReader, *blockio.BlockWriter, *freezeblocks.RoSnapshots, *libstate.AggregatorV3, error) { + allSnapshots := freezeblocks.NewRoSnapshots(snConfig, dirs.Snap, snashotVersion, logger) + var allBorSnapshots *freezeblocks.BorRoSnapshots if isBor { - allBorSnapshots = freezeblocks.NewBorRoSnapshots(snConfig, dirs.Snap, logger) + allBorSnapshots = freezeblocks.NewBorRoSnapshots(snConfig, dirs.Snap, snashotVersion, logger) } + var err error - if !snConfig.NoDownloader { + if snConfig.NoDownloader { + allSnapshots.ReopenFolder() + if isBor { + allBorSnapshots.ReopenFolder() + } + } else { allSnapshots.OptimisticalyReopenWithDB(db) if isBor { allBorSnapshots.OptimisticalyReopenWithDB(db) diff --git a/eth/ethconfig/config.go b/eth/ethconfig/config.go index bbb523044b6..61274a41939 100644 --- a/eth/ethconfig/config.go +++ b/eth/ethconfig/config.go @@ -42,6 +42,7 @@ import ( "github.com/ledgerwatch/erigon/eth/gasprice/gaspricecfg" "github.com/ledgerwatch/erigon/ethdb/prune" "github.com/ledgerwatch/erigon/params" + "github.com/ledgerwatch/erigon/rpc" ) // AggregationStep number of transactions in smallest static file @@ -77,6 +78,7 @@ var Defaults = Config{ ReconWorkerCount: estimate.ReconstituteState.Workers(), BodyCacheLimit: 256 * 1024 * 1024, BodyDownloadTimeoutSeconds: 2, + PruneLimit: 100, }, Ethash: ethashcfg.Config{ CachesInMem: 2, @@ -269,6 +271,13 @@ type Sync struct { BodyCacheLimit datasize.ByteSize BodyDownloadTimeoutSeconds int // TODO: change to duration + PruneLimit int //the maxumum records to delete from the DB during pruning + BreakAfterStage string + LoopBlockLimit uint + + UploadLocation string + UploadFrom rpc.BlockNumber + FrozenBlockLimit uint64 } // Chains where snapshots are enabled by default diff --git a/eth/stagedsync/default_stages.go b/eth/stagedsync/default_stages.go index 7afee387c1c..a882729abdc 100644 --- a/eth/stagedsync/default_stages.go +++ b/eth/stagedsync/default_stages.go @@ -40,7 +40,7 @@ func DefaultStages(ctx context.Context, return nil }, Prune: func(firstCycle bool, p *PruneState, tx kv.RwTx, logger log.Logger) error { - return SnapshotsPrune(p, firstCycle, snapshots, ctx, tx) + return SnapshotsPrune(p, firstCycle, snapshots, ctx, tx, logger) }, }, { @@ -266,7 +266,7 @@ func PipelineStages(ctx context.Context, snapshots SnapshotsCfg, blockHashCfg Bl return nil }, Prune: func(firstCycle bool, p *PruneState, tx kv.RwTx, logger log.Logger) error { - return SnapshotsPrune(p, firstCycle, snapshots, ctx, tx) + return SnapshotsPrune(p, firstCycle, snapshots, ctx, tx, logger) }, }, { @@ -430,6 +430,215 @@ func PipelineStages(ctx context.Context, snapshots SnapshotsCfg, blockHashCfg Bl } } +// when uploading - potentially from zero we need to include headers and bodies stages otherwise we won't recover the POW portion of the chain +func UploaderPipelineStages(ctx context.Context, snapshots SnapshotsCfg, headers HeadersCfg, blockHashCfg BlockHashesCfg, senders SendersCfg, bodies BodiesCfg, exec ExecuteBlockCfg, hashState HashStateCfg, trieCfg TrieCfg, history HistoryCfg, logIndex LogIndexCfg, callTraces CallTracesCfg, txLookup TxLookupCfg, finish FinishCfg, test bool) []*Stage { + return []*Stage{ + { + ID: stages.Snapshots, + Description: "Download snapshots", + Forward: func(firstCycle bool, badBlockUnwind bool, s *StageState, u Unwinder, tx kv.RwTx, logger log.Logger) error { + if badBlockUnwind { + return nil + } + return SpawnStageSnapshots(s, ctx, tx, snapshots, firstCycle, logger) + }, + Unwind: func(firstCycle bool, u *UnwindState, s *StageState, tx kv.RwTx, logger log.Logger) error { + return nil + }, + Prune: func(firstCycle bool, p *PruneState, tx kv.RwTx, logger log.Logger) error { + return SnapshotsPrune(p, firstCycle, snapshots, ctx, tx, logger) + }, + }, + { + ID: stages.Headers, + Description: "Download headers", + Forward: func(firstCycle bool, badBlockUnwind bool, s *StageState, u Unwinder, tx kv.RwTx, logger log.Logger) error { + if badBlockUnwind { + return nil + } + return SpawnStageHeaders(s, u, ctx, tx, headers, firstCycle, test, logger) + }, + Unwind: func(firstCycle bool, u *UnwindState, s *StageState, tx kv.RwTx, logger log.Logger) error { + return HeadersUnwind(u, s, tx, headers, test) + }, + Prune: func(firstCycle bool, p *PruneState, tx kv.RwTx, logger log.Logger) error { + return nil + }, + }, + { + ID: stages.BlockHashes, + Description: "Write block hashes", + Forward: func(firstCycle bool, badBlockUnwind bool, s *StageState, u Unwinder, tx kv.RwTx, logger log.Logger) error { + return SpawnBlockHashStage(s, tx, blockHashCfg, ctx, logger) + }, + Unwind: func(firstCycle bool, u *UnwindState, s *StageState, tx kv.RwTx, logger log.Logger) error { + return UnwindBlockHashStage(u, tx, blockHashCfg, ctx) + }, + Prune: func(firstCycle bool, p *PruneState, tx kv.RwTx, logger log.Logger) error { + return PruneBlockHashStage(p, tx, blockHashCfg, ctx) + }, + }, + { + ID: stages.Bodies, + Description: "Download block bodies", + Forward: func(firstCycle bool, badBlockUnwind bool, s *StageState, u Unwinder, tx kv.RwTx, logger log.Logger) error { + return BodiesForward(s, u, ctx, tx, bodies, test, firstCycle, logger) + }, + Unwind: func(firstCycle bool, u *UnwindState, s *StageState, tx kv.RwTx, logger log.Logger) error { + return UnwindBodiesStage(u, tx, bodies, ctx) + }, + Prune: func(firstCycle bool, p *PruneState, tx kv.RwTx, logger log.Logger) error { + return nil + }, + }, + { + ID: stages.Senders, + Description: "Recover senders from tx signatures", + Forward: func(firstCycle bool, badBlockUnwind bool, s *StageState, u Unwinder, tx kv.RwTx, logger log.Logger) error { + return SpawnRecoverSendersStage(senders, s, u, tx, 0, ctx, logger) + }, + Unwind: func(firstCycle bool, u *UnwindState, s *StageState, tx kv.RwTx, logger log.Logger) error { + return UnwindSendersStage(u, tx, senders, ctx) + }, + Prune: func(firstCycle bool, p *PruneState, tx kv.RwTx, logger log.Logger) error { + return PruneSendersStage(p, tx, senders, ctx) + }, + }, + { + ID: stages.Execution, + Description: "Execute blocks w/o hash checks", + Forward: func(firstCycle bool, badBlockUnwind bool, s *StageState, u Unwinder, tx kv.RwTx, logger log.Logger) error { + return SpawnExecuteBlocksStage(s, u, tx, 0, ctx, exec, firstCycle, logger) + }, + Unwind: func(firstCycle bool, u *UnwindState, s *StageState, tx kv.RwTx, logger log.Logger) error { + return UnwindExecutionStage(u, s, tx, ctx, exec, firstCycle, logger) + }, + Prune: func(firstCycle bool, p *PruneState, tx kv.RwTx, logger log.Logger) error { + return PruneExecutionStage(p, tx, exec, ctx, firstCycle) + }, + }, + { + ID: stages.HashState, + Description: "Hash the key in the state", + Disabled: exec.historyV3 && ethconfig.EnableHistoryV4InTest, + Forward: func(firstCycle bool, badBlockUnwind bool, s *StageState, u Unwinder, tx kv.RwTx, logger log.Logger) error { + return SpawnHashStateStage(s, tx, hashState, ctx, logger) + }, + Unwind: func(firstCycle bool, u *UnwindState, s *StageState, tx kv.RwTx, logger log.Logger) error { + return UnwindHashStateStage(u, s, tx, hashState, ctx, logger) + }, + Prune: func(firstCycle bool, p *PruneState, tx kv.RwTx, logger log.Logger) error { + return PruneHashStateStage(p, tx, hashState, ctx) + }, + }, + { + ID: stages.IntermediateHashes, + Description: "Generate intermediate hashes and computing state root", + Disabled: exec.historyV3 && ethconfig.EnableHistoryV4InTest, + Forward: func(firstCycle bool, badBlockUnwind bool, s *StageState, u Unwinder, tx kv.RwTx, logger log.Logger) error { + if exec.chainConfig.IsPrague(0) { + _, err := SpawnVerkleTrie(s, u, tx, trieCfg, ctx, logger) + return err + } + _, err := SpawnIntermediateHashesStage(s, u, tx, trieCfg, ctx, logger) + return err + }, + Unwind: func(firstCycle bool, u *UnwindState, s *StageState, tx kv.RwTx, logger log.Logger) error { + if exec.chainConfig.IsPrague(0) { + return UnwindVerkleTrie(u, s, tx, trieCfg, ctx, logger) + } + return UnwindIntermediateHashesStage(u, s, tx, trieCfg, ctx, logger) + }, + Prune: func(firstCycle bool, p *PruneState, tx kv.RwTx, logger log.Logger) error { + return PruneIntermediateHashesStage(p, tx, trieCfg, ctx) + }, + }, + { + ID: stages.CallTraces, + Description: "Generate call traces index", + DisabledDescription: "Work In Progress", + Disabled: exec.historyV3, + Forward: func(firstCycle bool, badBlockUnwind bool, s *StageState, u Unwinder, tx kv.RwTx, logger log.Logger) error { + return SpawnCallTraces(s, tx, callTraces, ctx, logger) + }, + Unwind: func(firstCycle bool, u *UnwindState, s *StageState, tx kv.RwTx, logger log.Logger) error { + return UnwindCallTraces(u, s, tx, callTraces, ctx, logger) + }, + Prune: func(firstCycle bool, p *PruneState, tx kv.RwTx, logger log.Logger) error { + return PruneCallTraces(p, tx, callTraces, ctx, logger) + }, + }, + { + ID: stages.AccountHistoryIndex, + Description: "Generate account history index", + Disabled: exec.historyV3, + Forward: func(firstCycle bool, badBlockUnwind bool, s *StageState, u Unwinder, tx kv.RwTx, logger log.Logger) error { + return SpawnAccountHistoryIndex(s, tx, history, ctx, logger) + }, + Unwind: func(firstCycle bool, u *UnwindState, s *StageState, tx kv.RwTx, logger log.Logger) error { + return UnwindAccountHistoryIndex(u, s, tx, history, ctx) + }, + Prune: func(firstCycle bool, p *PruneState, tx kv.RwTx, logger log.Logger) error { + return PruneAccountHistoryIndex(p, tx, history, ctx, logger) + }, + }, + { + ID: stages.StorageHistoryIndex, + Description: "Generate storage history index", + Disabled: exec.historyV3, + Forward: func(firstCycle bool, badBlockUnwind bool, s *StageState, u Unwinder, tx kv.RwTx, logger log.Logger) error { + return SpawnStorageHistoryIndex(s, tx, history, ctx, logger) + }, + Unwind: func(firstCycle bool, u *UnwindState, s *StageState, tx kv.RwTx, logger log.Logger) error { + return UnwindStorageHistoryIndex(u, s, tx, history, ctx) + }, + Prune: func(firstCycle bool, p *PruneState, tx kv.RwTx, logger log.Logger) error { + return PruneStorageHistoryIndex(p, tx, history, ctx, logger) + }, + }, + { + ID: stages.LogIndex, + Description: "Generate receipt logs index", + Disabled: exec.historyV3, + Forward: func(firstCycle bool, badBlockUnwind bool, s *StageState, u Unwinder, tx kv.RwTx, logger log.Logger) error { + return SpawnLogIndex(s, tx, logIndex, ctx, 0, logger) + }, + Unwind: func(firstCycle bool, u *UnwindState, s *StageState, tx kv.RwTx, logger log.Logger) error { + return UnwindLogIndex(u, s, tx, logIndex, ctx) + }, + Prune: func(firstCycle bool, p *PruneState, tx kv.RwTx, logger log.Logger) error { + return PruneLogIndex(p, tx, logIndex, ctx, logger) + }, + }, + { + ID: stages.TxLookup, + Description: "Generate tx lookup index", + Forward: func(firstCycle bool, badBlockUnwind bool, s *StageState, u Unwinder, tx kv.RwTx, logger log.Logger) error { + return SpawnTxLookup(s, tx, 0 /* toBlock */, txLookup, ctx, logger) + }, + Unwind: func(firstCycle bool, u *UnwindState, s *StageState, tx kv.RwTx, logger log.Logger) error { + return UnwindTxLookup(u, s, tx, txLookup, ctx, logger) + }, + Prune: func(firstCycle bool, p *PruneState, tx kv.RwTx, logger log.Logger) error { + return PruneTxLookup(p, tx, txLookup, ctx, firstCycle, logger) + }, + }, + { + ID: stages.Finish, + Description: "Final: update current block for the RPC API", + Forward: func(firstCycle bool, badBlockUnwind bool, s *StageState, _ Unwinder, tx kv.RwTx, logger log.Logger) error { + return FinishForward(s, tx, finish, firstCycle) + }, + Unwind: func(firstCycle bool, u *UnwindState, s *StageState, tx kv.RwTx, logger log.Logger) error { + return UnwindFinish(u, tx, finish, ctx) + }, + Prune: func(firstCycle bool, p *PruneState, tx kv.RwTx, logger log.Logger) error { + return PruneFinish(p, tx, finish, ctx) + }, + }, + } +} + // StateStages are all stages necessary for basic unwind and stage computation, it is primarily used to process side forks and memory execution. func StateStages(ctx context.Context, headers HeadersCfg, bodies BodiesCfg, blockHashCfg BlockHashesCfg, senders SendersCfg, exec ExecuteBlockCfg, hashState HashStateCfg, trieCfg TrieCfg) []*Stage { return []*Stage{ diff --git a/eth/stagedsync/exec3.go b/eth/stagedsync/exec3.go index 3191ccf0b5f..41513721d95 100644 --- a/eth/stagedsync/exec3.go +++ b/eth/stagedsync/exec3.go @@ -29,7 +29,6 @@ import ( "github.com/ledgerwatch/erigon-lib/kv/rawdbv3" "github.com/ledgerwatch/erigon-lib/metrics" libstate "github.com/ledgerwatch/erigon-lib/state" - state2 "github.com/ledgerwatch/erigon-lib/state" "github.com/ledgerwatch/erigon/cmd/state/exec22" "github.com/ledgerwatch/erigon/cmd/state/exec3" "github.com/ledgerwatch/erigon/common/math" @@ -760,7 +759,7 @@ func blockWithSenders(db kv.RoDB, tx kv.Tx, blockReader services.BlockReader, bl return blockReader.BlockByNumber(context.Background(), tx, blockNum) } -func processResultQueue(in *exec22.QueueWithRetry, rws *exec22.ResultsQueue, outputTxNumIn uint64, rs *state.StateV3, agg *state2.AggregatorV3, applyTx kv.Tx, backPressure chan struct{}, applyWorker *exec3.Worker, canRetry, forceStopAtBlockEnd bool) (outputTxNum uint64, conflicts, triggers int, processedBlockNum uint64, stopedAtBlockEnd bool, err error) { +func processResultQueue(in *exec22.QueueWithRetry, rws *exec22.ResultsQueue, outputTxNumIn uint64, rs *state.StateV3, agg *libstate.AggregatorV3, applyTx kv.Tx, backPressure chan struct{}, applyWorker *exec3.Worker, canRetry, forceStopAtBlockEnd bool) (outputTxNum uint64, conflicts, triggers int, processedBlockNum uint64, stopedAtBlockEnd bool, err error) { rwsIt := rws.Iter() defer rwsIt.Close() @@ -1044,7 +1043,7 @@ func reconstituteStep(last bool, return err } if b == nil { - return fmt.Errorf("could not find block %d\n", bn) + return fmt.Errorf("could not find block %d", bn) } txs := b.Transactions() header := b.HeaderNoCopy() @@ -1334,7 +1333,7 @@ func safeCloseTxTaskCh(ch chan *exec22.TxTask) { func ReconstituteState(ctx context.Context, s *StageState, dirs datadir.Dirs, workerCount int, batchSize datasize.ByteSize, chainDb kv.RwDB, blockReader services.FullBlockReader, - logger log.Logger, agg *state2.AggregatorV3, engine consensus.Engine, + logger log.Logger, agg *libstate.AggregatorV3, engine consensus.Engine, chainConfig *chain.Config, genesis *types.Genesis) (err error) { startTime := time.Now() defer agg.EnableMadvNormal().DisableReadAhead() diff --git a/eth/stagedsync/stage_bodies.go b/eth/stagedsync/stage_bodies.go index bf2feb97c9b..62811264323 100644 --- a/eth/stagedsync/stage_bodies.go +++ b/eth/stagedsync/stage_bodies.go @@ -35,6 +35,7 @@ type BodiesCfg struct { blockReader services.FullBlockReader blockWriter *blockio.BlockWriter historyV3 bool + loopBreakCheck func(int) bool } func StageBodiesCfg(db kv.RwDB, bd *bodydownload.BodyDownload, @@ -43,8 +44,12 @@ func StageBodiesCfg(db kv.RwDB, bd *bodydownload.BodyDownload, chanConfig chain.Config, blockReader services.FullBlockReader, historyV3 bool, - blockWriter *blockio.BlockWriter) BodiesCfg { - return BodiesCfg{db: db, bd: bd, bodyReqSend: bodyReqSend, penalise: penalise, blockPropagator: blockPropagator, timeout: timeout, chanConfig: chanConfig, blockReader: blockReader, historyV3: historyV3, blockWriter: blockWriter} + blockWriter *blockio.BlockWriter, + loopBreakCheck func(int) bool) BodiesCfg { + return BodiesCfg{ + db: db, bd: bd, bodyReqSend: bodyReqSend, penalise: penalise, blockPropagator: blockPropagator, + timeout: timeout, chanConfig: chanConfig, blockReader: blockReader, + historyV3: historyV3, blockWriter: blockWriter, loopBreakCheck: loopBreakCheck} } // BodiesForward progresses Bodies stage in the forward direction @@ -59,6 +64,9 @@ func BodiesForward( logger log.Logger, ) error { var doUpdate bool + + startTime := time.Now() + if s.BlockNumber < cfg.blockReader.FrozenBlocks() { s.BlockNumber = cfg.blockReader.FrozenBlocks() doUpdate = true @@ -221,6 +229,10 @@ func BodiesForward( } } cfg.bd.AdvanceLow() + + if cfg.loopBreakCheck != nil && cfg.loopBreakCheck(int(i)) { + return true, nil + } } d5 += time.Since(start) @@ -282,7 +294,10 @@ func BodiesForward( return libcommon.ErrStopped } if bodyProgress > s.BlockNumber+16 { - logger.Info(fmt.Sprintf("[%s] Processed", logPrefix), "highest", bodyProgress) + blocks := bodyProgress - s.BlockNumber + secs := time.Since(startTime).Seconds() + logger.Info(fmt.Sprintf("[%s] Processed", logPrefix), "highest", bodyProgress, + "blocks", blocks, "in", secs, "blk/sec", uint64(float64(blocks)/secs)) } return nil } @@ -304,6 +319,7 @@ func logDownloadingBodies(logPrefix string, committed, remaining uint64, totalDe "wasted/sec", libcommon.ByteCount(uint64(wastedSpeed)), "remaining", remaining, "delivered", totalDelivered, + "blk/sec", totalDelivered/uint64(logInterval/time.Second), "cache", libcommon.ByteCount(uint64(bodyCacheSize)), "alloc", libcommon.ByteCount(m.Alloc), "sys", libcommon.ByteCount(m.Sys), diff --git a/eth/stagedsync/stage_bor_heimdall.go b/eth/stagedsync/stage_bor_heimdall.go index 2b7d8b39e13..4c4548a2e2e 100644 --- a/eth/stagedsync/stage_bor_heimdall.go +++ b/eth/stagedsync/stage_bor_heimdall.go @@ -55,6 +55,7 @@ type BorHeimdallCfg struct { hd *headerdownload.HeaderDownload penalize func(context.Context, []headerdownload.PenaltyItem) stateReceiverABI abi.ABI + loopBreakCheck func(int) bool recents *lru.ARCCache[libcommon.Hash, *bor.Snapshot] signatures *lru.ARCCache[libcommon.Hash, libcommon.Address] } @@ -68,6 +69,7 @@ func StageBorHeimdallCfg( blockReader services.FullBlockReader, hd *headerdownload.HeaderDownload, penalize func(context.Context, []headerdownload.PenaltyItem), + loopBreakCheck func(int) bool, recents *lru.ARCCache[libcommon.Hash, *bor.Snapshot], signatures *lru.ARCCache[libcommon.Hash, libcommon.Address], ) BorHeimdallCfg { @@ -81,6 +83,7 @@ func StageBorHeimdallCfg( hd: hd, penalize: penalize, stateReceiverABI: contract.StateReceiver(), + loopBreakCheck: loopBreakCheck, recents: recents, signatures: signatures, } @@ -262,7 +265,7 @@ func BorHeimdallForward( return err } if header == nil { - return fmt.Errorf("["+s.LogPrefix()+"] header not found: %d", blockNum) + return fmt.Errorf("header not found: %d", blockNum) } // Whitelist service is called to check if the bor chain is @@ -274,7 +277,7 @@ func BorHeimdallForward( {Penalty: headerdownload.BadBlockPenalty, PeerID: cfg.hd.SourcePeerId(header.Hash())}}) dataflow.HeaderDownloadStates.AddChange(blockNum, dataflow.HeaderInvalidated) s.state.UnwindTo(blockNum-1, ForkReset(header.Hash())) - return fmt.Errorf("["+s.LogPrefix()+"] verification failed for header %d: %x", blockNum, header.Hash()) + return fmt.Errorf("verification failed for header %d: %x", blockNum, header.Hash()) } } } @@ -293,19 +296,21 @@ func BorHeimdallForward( var snap *bor.Snapshot if header != nil { - snap = loadSnapshot(blockNum, header.Hash(), cfg.chainConfig.Bor, recents, signatures, cfg.snapDb, logger) + if cfg.blockReader.BorSnapshots().SegmentsMin() == 0 { + snap = loadSnapshot(blockNum, header.Hash(), cfg.chainConfig.Bor, recents, signatures, cfg.snapDb, logger) - if snap == nil { - snap, err = initValidatorSets(ctx, tx, cfg.blockReader, cfg.chainConfig.Bor, - chain, blockNum, recents, signatures, cfg.snapDb, logger, s.LogPrefix()) + if snap == nil { + snap, err = initValidatorSets(ctx, tx, cfg.blockReader, cfg.chainConfig.Bor, + cfg.heimdallClient, chain, blockNum, recents, signatures, cfg.snapDb, logger, s.LogPrefix()) - if err != nil { - return fmt.Errorf("can't initialise validator sets: %w", err) + if err != nil { + return fmt.Errorf("can't initialise validator sets: %w", err) + } } - } - if err = persistValidatorSets(ctx, snap, u, tx, cfg.blockReader, cfg.chainConfig.Bor, chain, blockNum, header.Hash(), recents, signatures, cfg.snapDb, logger, s.LogPrefix()); err != nil { - return fmt.Errorf("can't persist validator sets: %w", err) + if err = persistValidatorSets(ctx, snap, u, tx, cfg.blockReader, cfg.chainConfig.Bor, chain, blockNum, header.Hash(), recents, signatures, cfg.snapDb, logger, s.LogPrefix()); err != nil { + return fmt.Errorf("can't persist validator sets: %w", err) + } } if !mine { @@ -318,6 +323,11 @@ func BorHeimdallForward( } } } + + if cfg.loopBreakCheck != nil && cfg.loopBreakCheck(int(blockNum-lastBlockNum)) { + break + } + } if err = s.Update(tx, headNumber); err != nil { @@ -645,9 +655,10 @@ func persistValidatorSets( func initValidatorSets( ctx context.Context, - tx kv.Tx, + tx kv.RwTx, blockReader services.FullBlockReader, config *chain.BorConfig, + heimdallClient heimdall.IHeimdallClient, chain consensus.ChainHeaderReader, blockNum uint64, recents *lru.ARCCache[libcommon.Hash, *bor.Snapshot], @@ -673,8 +684,17 @@ func initValidatorSets( // get validators and current span zeroSpanBytes, err := blockReader.Span(ctx, tx, 0) + if err != nil { - return nil, err + if _, err := fetchAndWriteSpans(ctx, 0, tx, heimdallClient, logPrefix, logger); err != nil { + return nil, err + } + + zeroSpanBytes, err = blockReader.Span(ctx, tx, 0) + + if err != nil { + return nil, err + } } if zeroSpanBytes == nil { diff --git a/eth/stagedsync/stage_headers.go b/eth/stagedsync/stage_headers.go index 6dc0a9a3b27..aa60d348ae7 100644 --- a/eth/stagedsync/stage_headers.go +++ b/eth/stagedsync/stage_headers.go @@ -15,6 +15,8 @@ import ( "github.com/ledgerwatch/erigon-lib/common/hexutility" "github.com/ledgerwatch/erigon-lib/kv" "github.com/ledgerwatch/erigon/core/rawdb/blockio" + "github.com/ledgerwatch/erigon/eth/ethconfig" + "github.com/ledgerwatch/erigon/eth/stagedsync/stages" "github.com/ledgerwatch/log/v3" "github.com/ledgerwatch/erigon/common" @@ -49,7 +51,8 @@ type HeadersCfg struct { forkValidator *engine_helpers.ForkValidator notifications *shards.Notifications - loopBreakCheck func() bool + syncConfig ethconfig.Sync + loopBreakCheck func(int) bool } func StageHeadersCfg( @@ -57,6 +60,7 @@ func StageHeadersCfg( headerDownload *headerdownload.HeaderDownload, bodyDownload *bodydownload.BodyDownload, chainConfig chain.Config, + syncConfig ethconfig.Sync, headerReqSend func(context.Context, *headerdownload.HeaderRequest) ([64]byte, bool), announceNewHashes func(context.Context, []headerdownload.Announce), penalize func(context.Context, []headerdownload.PenaltyItem), @@ -67,12 +71,13 @@ func StageHeadersCfg( tmpdir string, notifications *shards.Notifications, forkValidator *engine_helpers.ForkValidator, - loopBreakCheck func() bool) HeadersCfg { + loopBreakCheck func(int) bool) HeadersCfg { return HeadersCfg{ db: db, hd: headerDownload, bodyDownload: bodyDownload, chainConfig: chainConfig, + syncConfig: syncConfig, headerReqSend: headerReqSend, announceNewHashes: announceNewHashes, penalize: penalize, @@ -128,20 +133,21 @@ func HeadersPOW( useExternalTx bool, logger log.Logger, ) error { - var headerProgress uint64 var err error + startTime := time.Now() + if err = cfg.hd.ReadProgressFromDb(tx); err != nil { return err } cfg.hd.SetPOSSync(false) cfg.hd.SetFetchingNew(true) defer cfg.hd.SetFetchingNew(false) - headerProgress = cfg.hd.Progress() + startProgress := cfg.hd.Progress() logPrefix := s.LogPrefix() // Check if this is called straight after the unwinds, which means we need to create new canonical markings - hash, err := cfg.blockReader.CanonicalHash(ctx, tx, headerProgress) + hash, err := cfg.blockReader.CanonicalHash(ctx, tx, startProgress) if err != nil { return err } @@ -149,7 +155,7 @@ func HeadersPOW( defer logEvery.Stop() if hash == (libcommon.Hash{}) { headHash := rawdb.ReadHeadHeaderHash(tx) - if err = fixCanonicalChain(logPrefix, logEvery, headerProgress, headHash, tx, cfg.blockReader, logger); err != nil { + if err = fixCanonicalChain(logPrefix, logEvery, startProgress, headHash, tx, cfg.blockReader, logger); err != nil { return err } if !useExternalTx { @@ -165,21 +171,23 @@ func HeadersPOW( return nil } - logger.Info(fmt.Sprintf("[%s] Waiting for headers...", logPrefix), "from", headerProgress) + logger.Info(fmt.Sprintf("[%s] Waiting for headers...", logPrefix), "from", startProgress) - localTd, err := rawdb.ReadTd(tx, hash, headerProgress) + localTd, err := rawdb.ReadTd(tx, hash, startProgress) if err != nil { return err } + /* TEMP TESTING if localTd == nil { - return fmt.Errorf("localTD is nil: %d, %x", headerProgress, hash) + return fmt.Errorf("localTD is nil: %d, %x", startProgress, hash) } - headerInserter := headerdownload.NewHeaderInserter(logPrefix, localTd, headerProgress, cfg.blockReader) + TEMP TESTING */ + headerInserter := headerdownload.NewHeaderInserter(logPrefix, localTd, startProgress, cfg.blockReader) cfg.hd.SetHeaderReader(&ChainReaderImpl{config: &cfg.chainConfig, tx: tx, blockReader: cfg.blockReader}) stopped := false var noProgressCounter uint = 0 - prevProgress := headerProgress + prevProgress := startProgress var wasProgress bool var lastSkeletonTime time.Time var peer [64]byte @@ -187,14 +195,15 @@ func HeadersPOW( Loop: for !stopped { - transitionedToPoS, err := rawdb.Transitioned(tx, headerProgress, cfg.chainConfig.TerminalTotalDifficulty) + transitionedToPoS, err := rawdb.Transitioned(tx, startProgress, cfg.chainConfig.TerminalTotalDifficulty) if err != nil { return err } if transitionedToPoS { - if err := s.Update(tx, headerProgress); err != nil { + if err := s.Update(tx, startProgress); err != nil { return err } + s.state.posTransition = &startProgress break } @@ -241,8 +250,9 @@ Loop: } } // Load headers into the database - var inSync bool - if inSync, err = cfg.hd.InsertHeaders(headerInserter.NewFeedHeaderFunc(tx, cfg.blockReader), cfg.chainConfig.TerminalTotalDifficulty, logPrefix, logEvery.C, uint64(currentTime.Unix())); err != nil { + inSync, err := cfg.hd.InsertHeaders(headerInserter.NewFeedHeaderFunc(tx, cfg.blockReader), cfg.syncConfig.LoopBlockLimit, cfg.chainConfig.TerminalTotalDifficulty, logPrefix, logEvery.C, uint64(currentTime.Unix())) + + if err != nil { return err } @@ -255,7 +265,15 @@ Loop: } } - if cfg.loopBreakCheck != nil && cfg.loopBreakCheck() { + if cfg.syncConfig.LoopBlockLimit > 0 { + if bodyProgress, err := stages.GetStageProgress(tx, stages.Bodies); err == nil { + if cfg.hd.Progress() > bodyProgress && cfg.hd.Progress()-bodyProgress > uint64(cfg.syncConfig.LoopBlockLimit*2) { + break + } + } + } + + if cfg.loopBreakCheck != nil && cfg.loopBreakCheck(int(cfg.hd.Progress()-startProgress)) { break } @@ -324,7 +342,16 @@ Loop: return libcommon.ErrStopped } // We do not print the following line if the stage was interrupted - logger.Info(fmt.Sprintf("[%s] Processed", logPrefix), "highest inserted", headerInserter.GetHighest(), "age", common.PrettyAge(time.Unix(int64(headerInserter.GetHighestTimestamp()), 0))) + + if s.state.posTransition != nil { + logger.Info(fmt.Sprintf("[%s] Transitioned to POS", logPrefix), "block", *s.state.posTransition) + } else { + headers := headerInserter.GetHighest() - startProgress + secs := time.Since(startTime).Seconds() + logger.Info(fmt.Sprintf("[%s] Processed", logPrefix), + "highest", headerInserter.GetHighest(), "age", common.PrettyAge(time.Unix(int64(headerInserter.GetHighestTimestamp()), 0)), + "headers", headers, "in", secs, "blk/sec", uint64(float64(headers)/secs)) + } return nil } diff --git a/eth/stagedsync/stage_interhashes_test.go b/eth/stagedsync/stage_interhashes_test.go index 3bf6c7faac3..107369b1659 100644 --- a/eth/stagedsync/stage_interhashes_test.go +++ b/eth/stagedsync/stage_interhashes_test.go @@ -3,9 +3,10 @@ package stagedsync_test import ( "context" "encoding/binary" - "github.com/ledgerwatch/erigon-lib/kv/dbutils" "testing" + "github.com/ledgerwatch/erigon-lib/kv/dbutils" + libcommon "github.com/ledgerwatch/erigon-lib/common" "github.com/ledgerwatch/erigon-lib/common/hexutility" "github.com/ledgerwatch/erigon-lib/common/length" @@ -80,7 +81,7 @@ func TestAccountAndStorageTrie(t *testing.T) { // ---------------------------------------------------------------- historyV3 := false - blockReader := freezeblocks.NewBlockReader(freezeblocks.NewRoSnapshots(ethconfig.BlocksFreezing{Enabled: false}, "", log.New()), freezeblocks.NewBorRoSnapshots(ethconfig.BlocksFreezing{Enabled: false}, "", log.New())) + blockReader := freezeblocks.NewBlockReader(freezeblocks.NewRoSnapshots(ethconfig.BlocksFreezing{Enabled: false}, "", 1, log.New()), freezeblocks.NewBorRoSnapshots(ethconfig.BlocksFreezing{Enabled: false}, "", 1, log.New())) cfg := stagedsync.StageTrieCfg(db, false, true, false, t.TempDir(), blockReader, nil, historyV3, nil) _, err := stagedsync.RegenerateIntermediateHashes("IH", tx, cfg, libcommon.Hash{} /* expectedRootHash */, ctx, log.New()) assert.Nil(t, err) @@ -202,7 +203,7 @@ func TestAccountTrieAroundExtensionNode(t *testing.T) { hash6 := libcommon.HexToHash("0x3100000000000000000000000000000000000000000000000000000000000000") assert.Nil(t, tx.Put(kv.HashedAccounts, hash6[:], encoded)) - blockReader := freezeblocks.NewBlockReader(freezeblocks.NewRoSnapshots(ethconfig.BlocksFreezing{Enabled: false}, "", log.New()), freezeblocks.NewBorRoSnapshots(ethconfig.BlocksFreezing{Enabled: false}, "", log.New())) + blockReader := freezeblocks.NewBlockReader(freezeblocks.NewRoSnapshots(ethconfig.BlocksFreezing{Enabled: false}, "", 1, log.New()), freezeblocks.NewBorRoSnapshots(ethconfig.BlocksFreezing{Enabled: false}, "", 1, log.New())) _, err := stagedsync.RegenerateIntermediateHashes("IH", tx, stagedsync.StageTrieCfg(db, false, true, false, t.TempDir(), blockReader, nil, historyV3, nil), libcommon.Hash{} /* expectedRootHash */, ctx, log.New()) assert.Nil(t, err) @@ -265,7 +266,7 @@ func TestStorageDeletion(t *testing.T) { // Populate account & storage trie DB tables // ---------------------------------------------------------------- historyV3 := false - blockReader := freezeblocks.NewBlockReader(freezeblocks.NewRoSnapshots(ethconfig.BlocksFreezing{Enabled: false}, "", log.New()), freezeblocks.NewBorRoSnapshots(ethconfig.BlocksFreezing{Enabled: false}, "", log.New())) + blockReader := freezeblocks.NewBlockReader(freezeblocks.NewRoSnapshots(ethconfig.BlocksFreezing{Enabled: false}, "", 1, log.New()), freezeblocks.NewBorRoSnapshots(ethconfig.BlocksFreezing{Enabled: false}, "", 1, log.New())) cfg := stagedsync.StageTrieCfg(db, false, true, false, t.TempDir(), blockReader, nil, historyV3, nil) _, err = stagedsync.RegenerateIntermediateHashes("IH", tx, cfg, libcommon.Hash{} /* expectedRootHash */, ctx, log.New()) assert.Nil(t, err) @@ -384,7 +385,7 @@ func TestHiveTrieRoot(t *testing.T) { common.FromHex("02081bc16d674ec80000"))) historyV3 := false - blockReader := freezeblocks.NewBlockReader(freezeblocks.NewRoSnapshots(ethconfig.BlocksFreezing{Enabled: false}, "", log.New()), freezeblocks.NewBorRoSnapshots(ethconfig.BlocksFreezing{Enabled: false}, "", log.New())) + blockReader := freezeblocks.NewBlockReader(freezeblocks.NewRoSnapshots(ethconfig.BlocksFreezing{Enabled: false}, "", 1, log.New()), freezeblocks.NewBorRoSnapshots(ethconfig.BlocksFreezing{Enabled: false}, "", 1, log.New())) cfg := stagedsync.StageTrieCfg(db, false, true, false, t.TempDir(), blockReader, nil, historyV3, nil) logger := log.New() _, err := stagedsync.RegenerateIntermediateHashes("IH", tx, cfg, libcommon.Hash{} /* expectedRootHash */, ctx, logger) diff --git a/eth/stagedsync/stage_senders.go b/eth/stagedsync/stage_senders.go index 453562c4e20..3de8e13904b 100644 --- a/eth/stagedsync/stage_senders.go +++ b/eth/stagedsync/stage_senders.go @@ -44,9 +44,10 @@ type SendersCfg struct { chainConfig *chain.Config hd *headerdownload.HeaderDownload blockReader services.FullBlockReader + loopBreakCheck func(int) bool } -func StageSendersCfg(db kv.RwDB, chainCfg *chain.Config, badBlockHalt bool, tmpdir string, prune prune.Mode, blockReader services.FullBlockReader, hd *headerdownload.HeaderDownload) SendersCfg { +func StageSendersCfg(db kv.RwDB, chainCfg *chain.Config, badBlockHalt bool, tmpdir string, prune prune.Mode, blockReader services.FullBlockReader, hd *headerdownload.HeaderDownload, loopBreakCheck func(int) bool) SendersCfg { const sendersBatchSize = 10000 const sendersBlockSize = 4096 @@ -62,8 +63,8 @@ func StageSendersCfg(db kv.RwDB, chainCfg *chain.Config, badBlockHalt bool, tmpd chainConfig: chainCfg, prune: prune, hd: hd, - - blockReader: blockReader, + blockReader: blockReader, + loopBreakCheck: loopBreakCheck, } } @@ -198,6 +199,10 @@ Loop: break } + if cfg.loopBreakCheck != nil && cfg.loopBreakCheck(int(blockNumber-startFrom)) { + break + } + has, err := cfg.blockReader.HasSenders(ctx, tx, blockHash, blockNumber) if err != nil { return err diff --git a/eth/stagedsync/stage_senders_test.go b/eth/stagedsync/stage_senders_test.go index 15bda777a11..bda8d5e90f4 100644 --- a/eth/stagedsync/stage_senders_test.go +++ b/eth/stagedsync/stage_senders_test.go @@ -128,7 +128,7 @@ func TestSenders(t *testing.T) { require.NoError(stages.SaveStageProgress(tx, stages.Bodies, 3)) - cfg := stagedsync.StageSendersCfg(db, params.TestChainConfig, false, "", prune.Mode{}, br, nil) + cfg := stagedsync.StageSendersCfg(db, params.TestChainConfig, false, "", prune.Mode{}, br, nil, nil) err = stagedsync.SpawnRecoverSendersStage(cfg, &stagedsync.StageState{ID: stages.Senders}, nil, tx, 3, m.Ctx, log.New()) require.NoError(err) diff --git a/eth/stagedsync/stage_snapshots.go b/eth/stagedsync/stage_snapshots.go index ca2f9ccb824..7be716c40b5 100644 --- a/eth/stagedsync/stage_snapshots.go +++ b/eth/stagedsync/stage_snapshots.go @@ -1,17 +1,35 @@ package stagedsync import ( + "bufio" + "bytes" "context" "encoding/binary" + "errors" "fmt" + "io/fs" "math/big" + "os" + "path/filepath" "reflect" + "runtime" + "sort" + "strings" + "sync" + "sync/atomic" "time" + "github.com/anacrolix/torrent" "github.com/ledgerwatch/log/v3" + "golang.org/x/sync/errgroup" "github.com/ledgerwatch/erigon-lib/chain" + "github.com/ledgerwatch/erigon-lib/chain/snapcfg" "github.com/ledgerwatch/erigon-lib/common/datadir" + "github.com/ledgerwatch/erigon-lib/common/dbg" + "github.com/ledgerwatch/erigon-lib/common/dir" + "github.com/ledgerwatch/erigon-lib/downloader" + "github.com/ledgerwatch/erigon-lib/downloader/snaptype" "github.com/ledgerwatch/erigon-lib/etl" proto_downloader "github.com/ledgerwatch/erigon-lib/gointerfaces/downloader" "github.com/ledgerwatch/erigon-lib/kv" @@ -21,11 +39,15 @@ import ( "github.com/ledgerwatch/erigon/core/rawdb" "github.com/ledgerwatch/erigon/core/types" + "github.com/ledgerwatch/erigon/eth/ethconfig" "github.com/ledgerwatch/erigon/eth/ethconfig/estimate" "github.com/ledgerwatch/erigon/eth/stagedsync/stages" + "github.com/ledgerwatch/erigon/rpc" "github.com/ledgerwatch/erigon/turbo/services" + "github.com/ledgerwatch/erigon/turbo/shards" "github.com/ledgerwatch/erigon/turbo/silkworm" "github.com/ledgerwatch/erigon/turbo/snapshotsync" + "github.com/ledgerwatch/erigon/turbo/snapshotsync/freezeblocks" ) type SnapshotsCfg struct { @@ -36,39 +58,79 @@ type SnapshotsCfg struct { blockRetire services.BlockRetire snapshotDownloader proto_downloader.DownloaderClient blockReader services.FullBlockReader - dbEventNotifier services.DBEventNotifier + notifier *shards.Notifications - historyV3 bool - caplin bool - agg *state.AggregatorV3 - silkworm *silkworm.Silkworm + historyV3 bool + caplin bool + agg *state.AggregatorV3 + silkworm *silkworm.Silkworm + snapshotUploader *snapshotUploader + syncConfig ethconfig.Sync } func StageSnapshotsCfg(db kv.RwDB, chainConfig chain.Config, + syncConfig ethconfig.Sync, dirs datadir.Dirs, blockRetire services.BlockRetire, snapshotDownloader proto_downloader.DownloaderClient, blockReader services.FullBlockReader, - dbEventNotifier services.DBEventNotifier, + notifier *shards.Notifications, historyV3 bool, agg *state.AggregatorV3, caplin bool, silkworm *silkworm.Silkworm, ) SnapshotsCfg { - return SnapshotsCfg{ + cfg := SnapshotsCfg{ db: db, chainConfig: chainConfig, dirs: dirs, blockRetire: blockRetire, snapshotDownloader: snapshotDownloader, blockReader: blockReader, - dbEventNotifier: dbEventNotifier, + notifier: notifier, historyV3: historyV3, caplin: caplin, agg: agg, silkworm: silkworm, + syncConfig: syncConfig, } + + if uploadFs := cfg.syncConfig.UploadLocation; len(uploadFs) > 0 { + + cfg.snapshotUploader = &snapshotUploader{ + cfg: &cfg, + uploadFs: uploadFs, + version: snapcfg.KnownCfg(chainConfig.ChainName, 0).Version, + torrentFiles: downloader.NewAtomicTorrentFiles(cfg.dirs.Snap), + } + + cfg.blockRetire.SetWorkers(estimate.CompressSnapshot.Workers()) + + freezingCfg := cfg.blockReader.FreezingCfg() + + if freezingCfg.Enabled && freezingCfg.Produce { + u := cfg.snapshotUploader + + if maxSeedable := u.maxSeedableHeader(); u.cfg.syncConfig.FrozenBlockLimit > 0 && maxSeedable > u.cfg.syncConfig.FrozenBlockLimit { + blockLimit := maxSeedable - u.minBlockNumber() + + if u.cfg.syncConfig.FrozenBlockLimit < blockLimit { + blockLimit = u.cfg.syncConfig.FrozenBlockLimit + } + + if snapshots, ok := u.cfg.blockReader.Snapshots().(*freezeblocks.RoSnapshots); ok { + snapshots.SetSegmentsMin(maxSeedable - blockLimit) + } + + if snapshots, ok := u.cfg.blockReader.BorSnapshots().(*freezeblocks.BorRoSnapshots); ok { + snapshots.SetSegmentsMin(maxSeedable - blockLimit) + } + } + } + } + + return cfg } func SpawnStageSnapshots( @@ -99,7 +161,12 @@ func SpawnStageSnapshots( if minProgress == 0 || progress < minProgress { minProgress = progress } + + if stage == stages.SyncStage(cfg.syncConfig.BreakAfterStage) { + break + } } + if minProgress > s.BlockNumber { if err = s.Update(tx, minProgress); err != nil { return err @@ -126,21 +193,60 @@ func DownloadAndIndexSnapshotsIfNeed(s *StageState, ctx context.Context, tx kv.R cstate = snapshotsync.AlsoCaplin } - if err := snapshotsync.WaitForDownloader(s.LogPrefix(), ctx, cfg.historyV3, cstate, cfg.agg, tx, cfg.blockReader, &cfg.chainConfig, cfg.snapshotDownloader); err != nil { - return err + if cfg.snapshotUploader != nil { + u := cfg.snapshotUploader + + u.init(ctx, logger) + + if cfg.syncConfig.UploadFrom != rpc.EarliestBlockNumber { + u.downloadLatestSnapshots(ctx, cfg.syncConfig.UploadFrom, u.version) + } + + if maxSeedable := u.maxSeedableHeader(); u.cfg.syncConfig.FrozenBlockLimit > 0 && maxSeedable > u.cfg.syncConfig.FrozenBlockLimit { + blockLimit := maxSeedable - u.minBlockNumber() + + if u.cfg.syncConfig.FrozenBlockLimit < blockLimit { + blockLimit = u.cfg.syncConfig.FrozenBlockLimit + } + + if snapshots, ok := u.cfg.blockReader.Snapshots().(*freezeblocks.RoSnapshots); ok { + snapshots.SetSegmentsMin(maxSeedable - blockLimit) + } + + if snapshots, ok := u.cfg.blockReader.BorSnapshots().(*freezeblocks.BorRoSnapshots); ok { + snapshots.SetSegmentsMin(maxSeedable - blockLimit) + } + } + + if err := cfg.blockReader.Snapshots().ReopenFolder(); err != nil { + return err + } + + if cfg.chainConfig.Bor != nil { + if err := cfg.blockReader.BorSnapshots().ReopenFolder(); err != nil { + return err + } + } + if cfg.notifier.Events != nil { // can notify right here, even that write txn is not commit + cfg.notifier.Events.OnNewSnapshot() + } + } else { + if err := snapshotsync.WaitForDownloader(ctx, s.LogPrefix(), cfg.historyV3, cstate, cfg.agg, tx, cfg.blockReader, &cfg.chainConfig, cfg.snapshotDownloader); err != nil { + return err + } } // It's ok to notify before tx.Commit(), because RPCDaemon does read list of files by gRPC (not by reading from db) - if cfg.dbEventNotifier != nil { - cfg.dbEventNotifier.OnNewSnapshot() + if cfg.notifier.Events != nil { + cfg.notifier.Events.OnNewSnapshot() } - cfg.blockReader.Snapshots().LogStat() + cfg.blockReader.Snapshots().LogStat("download") cfg.agg.LogStats(tx, func(endTxNumMinimax uint64) uint64 { _, histBlockNumProgress, _ := rawdbv3.TxNums.FindBlockNum(tx, endTxNumMinimax) return histBlockNumProgress }) - if err := cfg.blockRetire.BuildMissedIndicesIfNeed(ctx, s.LogPrefix(), cfg.dbEventNotifier, &cfg.chainConfig); err != nil { + if err := cfg.blockRetire.BuildMissedIndicesIfNeed(ctx, s.LogPrefix(), cfg.notifier.Events, &cfg.chainConfig); err != nil { return err } @@ -157,8 +263,8 @@ func DownloadAndIndexSnapshotsIfNeed(s *StageState, ctx context.Context, tx kv.R if err := cfg.agg.BuildMissedIndices(ctx, indexWorkers); err != nil { return err } - if cfg.dbEventNotifier != nil { - cfg.dbEventNotifier.OnNewSnapshot() + if cfg.notifier.Events != nil { + cfg.notifier.Events.OnNewSnapshot() } } @@ -173,6 +279,7 @@ func DownloadAndIndexSnapshotsIfNeed(s *StageState, ctx context.Context, tx kv.R if err := FillDBFromSnapshots(s.LogPrefix(), ctx, tx, cfg.dirs, cfg.blockReader, cfg.agg, logger); err != nil { return err } + return nil } @@ -300,7 +407,7 @@ func FillDBFromSnapshots(logPrefix string, ctx context.Context, tx kv.RwTx, dirs /* ====== PRUNING ====== */ // snapshots pruning sections works more as a retiring of blocks // retiring blocks means moving block data from db into snapshots -func SnapshotsPrune(s *PruneState, initialCycle bool, cfg SnapshotsCfg, ctx context.Context, tx kv.RwTx) (err error) { +func SnapshotsPrune(s *PruneState, initialCycle bool, cfg SnapshotsCfg, ctx context.Context, tx kv.RwTx, logger log.Logger) (err error) { useExternalTx := tx != nil if !useExternalTx { tx, err = cfg.db.BeginRw(ctx) @@ -311,32 +418,71 @@ func SnapshotsPrune(s *PruneState, initialCycle bool, cfg SnapshotsCfg, ctx cont } freezingCfg := cfg.blockReader.FreezingCfg() + if freezingCfg.Enabled { - if err := cfg.blockRetire.PruneAncientBlocks(tx, 100); err != nil { - return err - } - } - if freezingCfg.Enabled && freezingCfg.Produce { - //TODO: initialSync maybe save files progress here - if cfg.blockRetire.HasNewFrozenFiles() || cfg.agg.HasNewFrozenFiles() { - if err := rawdb.WriteSnapshots(tx, cfg.blockReader.FrozenFiles(), cfg.agg.Files()); err != nil { - return err + if freezingCfg.Produce { + //TODO: initialSync maybe save files progress here + if cfg.blockRetire.HasNewFrozenFiles() || cfg.agg.HasNewFrozenFiles() { + if err := rawdb.WriteSnapshots(tx, cfg.blockReader.FrozenFiles(), cfg.agg.Files()); err != nil { + return err + } } - } - cfg.blockRetire.RetireBlocksInBackground(ctx, s.ForwardProgress, log.LvlInfo, func(downloadRequest []services.DownloadRequest) error { - if cfg.snapshotDownloader == nil || reflect.ValueOf(cfg.snapshotDownloader).IsNil() { - return nil + var minBlockNumber uint64 + + if cfg.snapshotUploader != nil { + minBlockNumber = cfg.snapshotUploader.minBlockNumber() } - return snapshotsync.RequestSnapshotsDownload(ctx, downloadRequest, cfg.snapshotDownloader) - }, func(l []string) error { - if cfg.snapshotDownloader == nil || reflect.ValueOf(cfg.snapshotDownloader).IsNil() { + + cfg.blockRetire.RetireBlocksInBackground(ctx, minBlockNumber, s.ForwardProgress, log.LvlInfo, func(downloadRequest []services.DownloadRequest) error { + if cfg.snapshotDownloader != nil && !reflect.ValueOf(cfg.snapshotDownloader).IsNil() { + if err := snapshotsync.RequestSnapshotsDownload(ctx, downloadRequest, cfg.snapshotDownloader); err != nil { + return err + } + } + return nil - } - _, err := cfg.snapshotDownloader.Delete(ctx, &proto_downloader.DeleteRequest{Paths: l}) + }, func(l []string) error { + //if cfg.snapshotUploader != nil { + // TODO - we need to also remove files from the uploader (100k->500K transition) + //} + + if !(cfg.snapshotDownloader == nil || reflect.ValueOf(cfg.snapshotDownloader).IsNil()) { + _, err := cfg.snapshotDownloader.Delete(ctx, &proto_downloader.DeleteRequest{Paths: l}) + return err + } + + return nil + }) + + //cfg.agg.BuildFilesInBackground() + } + + if err := cfg.blockRetire.PruneAncientBlocks(tx, cfg.syncConfig.PruneLimit); err != nil { return err - }) - //cfg.agg.BuildFilesInBackground() + } + } + + if cfg.snapshotUploader != nil { + // if we're uploading make sure that the DB does not get too far + // ahead of the snapshot production process - otherwise DB will + // grow larger than necessary - we may also want to increase the + // workers + if s.ForwardProgress > cfg.blockReader.FrozenBlocks()+300_000 { + func() { + checkEvery := time.NewTicker(logInterval) + defer checkEvery.Stop() + + for s.ForwardProgress > cfg.blockReader.FrozenBlocks()+300_000 { + select { + case <-ctx.Done(): + return + case <-checkEvery.C: + log.Info(fmt.Sprintf("[%s] Waiting for snapshots...", s.LogPrefix()), "progress", s.ForwardProgress, "frozen", cfg.blockReader.FrozenBlocks(), "gap", s.ForwardProgress-cfg.blockReader.FrozenBlocks()) + } + } + }() + } } if !useExternalTx { @@ -347,3 +493,779 @@ func SnapshotsPrune(s *PruneState, initialCycle bool, cfg SnapshotsCfg, ctx cont return nil } + +type uploadState struct { + sync.Mutex + file string + info *snaptype.FileInfo + torrent *torrent.TorrentSpec + buildingTorrent bool + uploads []string + remote bool + hasRemoteTorrent bool + //remoteHash string + local bool + localHash string +} + +type snapshotUploader struct { + cfg *SnapshotsCfg + files map[string]*uploadState + uploadFs string + rclone *downloader.RCloneClient + uploadSession *downloader.RCloneSession + uploadScheduled atomic.Bool + uploading atomic.Bool + manifestMutex sync.Mutex + version uint8 + torrentFiles *downloader.TorrentFiles +} + +func (u *snapshotUploader) init(ctx context.Context, logger log.Logger) { + if u.files == nil { + freezingCfg := u.cfg.blockReader.FreezingCfg() + + if freezingCfg.Enabled && freezingCfg.Produce { + u.files = map[string]*uploadState{} + u.start(ctx, logger) + } + } +} + +func (u *snapshotUploader) maxUploadedHeader() uint64 { + var max uint64 + + if len(u.files) > 0 { + for _, state := range u.files { + if state.local && state.remote { + if state.info != nil { + if state.info.T == snaptype.Headers { + if state.info.To > max { + max = state.info.To + } + } + } else { + if info, ok := snaptype.ParseFileName(u.cfg.dirs.Snap, state.file); ok { + if info.T == snaptype.Headers { + if info.To > max { + max = info.To + } + } + state.info = &info + } + } + } + } + } + + return max +} + +type dirEntry struct { + name string +} + +func (e dirEntry) Name() string { + return e.name +} + +func (e dirEntry) IsDir() bool { + return false +} + +func (e dirEntry) Type() fs.FileMode { + return e.Mode() +} + +func (e dirEntry) Size() int64 { + return -1 +} + +func (e dirEntry) Mode() fs.FileMode { + return fs.ModeIrregular +} + +func (e dirEntry) ModTime() time.Time { + return time.Time{} +} + +func (e dirEntry) Sys() any { + return nil +} + +func (e dirEntry) Info() (fs.FileInfo, error) { + return e, nil +} + +var checkKnownSizes = false + +func (u *snapshotUploader) seedable(fi snaptype.FileInfo) bool { + if !fi.Seedable() { + return false + } + + if checkKnownSizes { + for _, it := range snapcfg.KnownCfg(u.cfg.chainConfig.ChainName, 1).Preverified { + info, _ := snaptype.ParseFileName("", it.Name) + + if fi.From == info.From { + return fi.To == info.To + } + + if fi.From < info.From { + return info.To-info.From == fi.To-fi.From + } + + if fi.From < info.To { + return false + } + } + } + + return true +} + +func (u *snapshotUploader) downloadManifest(ctx context.Context) ([]fs.DirEntry, error) { + u.manifestMutex.Lock() + defer u.manifestMutex.Unlock() + + reader, err := u.uploadSession.Cat(ctx, "manifest.txt") + + if err != nil { + return nil, err + } + + var entries []fs.DirEntry + + scanner := bufio.NewScanner(reader) + + for scanner.Scan() { + entries = append(entries, dirEntry{scanner.Text()}) + } + + if err := scanner.Err(); err != nil { + return nil, err + } + + return entries, nil +} + +func (u *snapshotUploader) uploadManifest(ctx context.Context, remoteRefresh bool) error { + u.manifestMutex.Lock() + defer u.manifestMutex.Unlock() + + if remoteRefresh { + u.refreshFromRemote(ctx) + } + + manifestFile := "manifest.txt" + + fileMap := map[string]string{} + + for file, state := range u.files { + if state.remote { + if state.hasRemoteTorrent { + fileMap[file] = file + ".torrent" + } else { + fileMap[file] = "" + } + } + } + + files := make([]string, 0, len(fileMap)) + + for torrent, file := range fileMap { + files = append(files, file) + + if len(torrent) > 0 { + files = append(files, torrent) + } + } + + sort.Strings(files) + + manifestEntries := bytes.Buffer{} + + for _, file := range files { + fmt.Fprintln(&manifestEntries, file) + } + + _ = os.WriteFile(filepath.Join(u.cfg.dirs.Snap, manifestFile), manifestEntries.Bytes(), 0644) + defer os.Remove(filepath.Join(u.cfg.dirs.Snap, manifestFile)) + + return u.uploadSession.Upload(ctx, manifestFile) +} + +func (u *snapshotUploader) refreshFromRemote(ctx context.Context) { + remoteFiles, err := u.uploadSession.ReadRemoteDir(ctx, true) + + if err != nil { + return + } + + u.updateRemotes(remoteFiles) +} + +func (u *snapshotUploader) updateRemotes(remoteFiles []fs.DirEntry) { + for _, fi := range remoteFiles { + var file string + var hasTorrent bool + + if hasTorrent = filepath.Ext(fi.Name()) == ".torrent"; hasTorrent { + file = strings.TrimSuffix(fi.Name(), ".torrent") + } else { + file = fi.Name() + } + + // if we have found the file & its torrent we don't + // need to attempt another sync operation + if state, ok := u.files[file]; ok { + state.remote = true + + if hasTorrent { + state.hasRemoteTorrent = true + } + + } else { + info, ok := snaptype.ParseFileName(u.cfg.dirs.Snap, fi.Name()) + + if !ok || info.Version != u.version { + continue + } + + u.files[file] = &uploadState{ + file: file, + info: &info, + local: dir.FileNonZero(info.Path), + hasRemoteTorrent: hasTorrent, + } + } + } +} + +func (u *snapshotUploader) downloadLatestSnapshots(ctx context.Context, blockNumber rpc.BlockNumber, version uint8) error { + + entries, err := u.downloadManifest(ctx) + + if err != nil { + entries, err = u.uploadSession.ReadRemoteDir(ctx, true) + } + + if err != nil { + return err + } + + lastSegments := map[snaptype.Type]fs.FileInfo{} + torrents := map[string]string{} + + for _, ent := range entries { + if info, err := ent.Info(); err == nil { + + if info.Size() > -1 && info.Size() <= 32 { + continue + } + + snapInfo, ok := info.Sys().(downloader.SnapInfo) + + if ok && snapInfo.Type() != snaptype.Unknown && snapInfo.Version() == version { + if last, ok := lastSegments[snapInfo.Type()]; ok { + if lastInfo, ok := last.Sys().(downloader.SnapInfo); ok && snapInfo.To() > lastInfo.To() { + lastSegments[snapInfo.Type()] = info + } + } else { + lastSegments[snapInfo.Type()] = info + } + } else { + if ext := filepath.Ext(info.Name()); ext == ".torrent" { + fileName := strings.TrimSuffix(info.Name(), ".torrent") + torrents[fileName] = info.Name() + } + } + } + } + + var min uint64 + + for _, info := range lastSegments { + if lastInfo, ok := info.Sys().(downloader.SnapInfo); ok { + if min == 0 || lastInfo.From() < min { + min = lastInfo.From() + } + } + } + + for segType, info := range lastSegments { + if lastInfo, ok := info.Sys().(downloader.SnapInfo); ok { + if lastInfo.From() > min { + for _, ent := range entries { + if info, err := ent.Info(); err == nil { + snapInfo, ok := info.Sys().(downloader.SnapInfo) + + if ok && snapInfo.Type() == segType && + snapInfo.Version() == version && + snapInfo.From() == min { + lastSegments[segType] = info + } + } + } + } + } + } + + downloads := make([]string, 0, len(lastSegments)) + + for _, info := range lastSegments { + downloads = append(downloads, info.Name()) + if torrent, ok := torrents[info.Name()]; ok { + downloads = append(downloads, torrent) + } + } + + if len(downloads) > 0 { + return u.uploadSession.Download(ctx, downloads...) + } + + return nil +} + +func (u *snapshotUploader) maxSeedableHeader() uint64 { + var max uint64 + + if list, err := snaptype.Segments(u.cfg.dirs.Snap, u.version); err == nil { + for _, info := range list { + if u.seedable(info) && info.T == snaptype.Headers && info.To > max { + max = info.To + } + } + } + + return max +} + +func (u *snapshotUploader) minBlockNumber() uint64 { + var min uint64 + + if list, err := snaptype.Segments(u.cfg.dirs.Snap, u.version); err == nil { + for _, info := range list { + if u.seedable(info) && min == 0 || info.From < min { + min = info.From + } + } + } + + return min +} + +func expandHomeDir(dirpath string) string { + home, err := os.UserHomeDir() + if err != nil { + return dirpath + } + prefix := fmt.Sprintf("~%c", os.PathSeparator) + if strings.HasPrefix(dirpath, prefix) { + return filepath.Join(home, dirpath[len(prefix):]) + } else if dirpath == "~" { + return home + } + return dirpath +} + +func isLocalFs(ctx context.Context, rclient *downloader.RCloneClient, fs string) bool { + + remotes, _ := rclient.ListRemotes(ctx) + + if remote, _, ok := strings.Cut(fs, ":"); ok { + for _, r := range remotes { + if remote == r { + return false + } + } + + return filepath.VolumeName(fs) == remote + } + + return true +} + +func (u *snapshotUploader) start(ctx context.Context, logger log.Logger) { + var err error + + u.rclone, err = downloader.NewRCloneClient(logger) + + if err != nil { + logger.Warn("[uploader] Uploading disabled: rclone start failed", "err", err) + return + } + + uploadFs := u.uploadFs + + if isLocalFs(ctx, u.rclone, uploadFs) { + uploadFs = expandHomeDir(filepath.Clean(uploadFs)) + + uploadFs, err = filepath.Abs(uploadFs) + + if err != nil { + logger.Warn("[uploader] Uploading disabled: invalid upload fs", "err", err, "fs", u.uploadFs) + return + } + + if err := os.MkdirAll(uploadFs, 0755); err != nil { + logger.Warn("[uploader] Uploading disabled: can't create upload fs", "err", err, "fs", u.uploadFs) + return + } + } + + u.uploadSession, err = u.rclone.NewSession(ctx, u.cfg.dirs.Snap, uploadFs) + + if err != nil { + logger.Warn("[uploader] Uploading disabled: rclone session failed", "err", err) + return + } + + go func() { + + remoteFiles, _ := u.downloadManifest(ctx) + refreshFromRemote := false + + if len(remoteFiles) > 0 { + u.updateRemotes(remoteFiles) + refreshFromRemote = true + } else { + u.refreshFromRemote(ctx) + } + + go u.uploadManifest(ctx, refreshFromRemote) + + logger.Debug("[snapshot uploader] starting snapshot subscription...") + snapshotSubCh, snapshotSubClean := u.cfg.notifier.Events.AddNewSnapshotSubscription() + defer snapshotSubClean() + + logger.Info("[snapshot uploader] subscription established") + + defer func() { + if err != nil { + if !errors.Is(err, context.Canceled) { + logger.Warn("[snapshot uploader] subscription closed", "reason", err) + } + } else { + logger.Warn("[snapshot uploader] subscription closed") + } + }() + + u.scheduleUpload(ctx, logger) + + for { + select { + case <-ctx.Done(): + err = ctx.Err() + return + case <-snapshotSubCh: + logger.Info("[snapshot uploader] new snapshot received") + u.scheduleUpload(ctx, logger) + } + } + }() +} + +func (u *snapshotUploader) scheduleUpload(ctx context.Context, logger log.Logger) { + if !u.uploadScheduled.CompareAndSwap(false, true) { + return + } + + if u.uploading.CompareAndSwap(false, true) { + go func() { + defer u.uploading.Store(false) + for u.uploadScheduled.Load() { + u.uploadScheduled.Store(false) + u.upload(ctx, logger) + } + }() + } +} + +func (u *snapshotUploader) removeBefore(before uint64) { + list, err := snaptype.Segments(u.cfg.dirs.Snap, u.version) + + if err != nil { + return + } + + var toReopen []string + var borToReopen []string + + var toRemove []string //nolint:prealloc + + for _, f := range list { + if f.To > before { + switch f.T { + case snaptype.BorEvents, snaptype.BorSpans: + borToReopen = append(borToReopen, filepath.Base(f.Path)) + default: + toReopen = append(toReopen, filepath.Base(f.Path)) + } + + continue + } + + toRemove = append(toRemove, f.Path) + } + + if len(toRemove) > 0 { + if snapshots, ok := u.cfg.blockReader.Snapshots().(*freezeblocks.RoSnapshots); ok { + snapshots.SetSegmentsMin(before) + snapshots.ReopenList(toReopen, true) + } + + if snapshots, ok := u.cfg.blockReader.BorSnapshots().(*freezeblocks.BorRoSnapshots); ok { + snapshots.ReopenList(borToReopen, true) + snapshots.SetSegmentsMin(before) + } + + for _, f := range toRemove { + _ = os.Remove(f) + _ = os.Remove(f + ".torrent") + ext := filepath.Ext(f) + withoutExt := f[:len(f)-len(ext)] + _ = os.Remove(withoutExt + ".idx") + + if strings.HasSuffix(withoutExt, "transactions") { + _ = os.Remove(withoutExt + "-to-block.idx") + } + } + } +} + +func (u *snapshotUploader) upload(ctx context.Context, logger log.Logger) { + defer func() { + if r := recover(); r != nil { + log.Error("[snapshot uploader] snapshot upload failed", "err", r, "stack", dbg.Stack()) + } + }() + + retryTime := 30 * time.Second + maxRetryTime := 300 * time.Second + + var uploadCount int + + for { + var processList []*uploadState + + for _, f := range u.cfg.blockReader.FrozenFiles() { + if state, ok := u.files[f]; !ok { + if fi, ok := snaptype.ParseFileName(u.cfg.dirs.Snap, f); ok { + if u.seedable(fi) { + state := &uploadState{ + file: f, + info: &fi, + local: true, + } + + if fi.TorrentFileExists() { + state.torrent, _ = u.torrentFiles.LoadByName(f) + } + + u.files[f] = state + processList = append(processList, state) + } + } + } else { + func() { + state.Lock() + defer state.Unlock() + + state.local = true + + if state.torrent == nil && state.info.TorrentFileExists() { + state.torrent, _ = u.torrentFiles.LoadByName(f) + if state.torrent != nil { + state.localHash = state.torrent.InfoHash.String() + } + } + + if !state.remote { + processList = append(processList, state) + } + }() + } + } + + var torrentList []*uploadState + + for _, state := range processList { + func() { + state.Lock() + defer state.Unlock() + if !(state.torrent != nil || state.buildingTorrent) { + torrentList = append(torrentList, state) + state.buildingTorrent = true + } + }() + } + + if len(torrentList) > 0 { + g, gctx := errgroup.WithContext(ctx) + g.SetLimit(runtime.GOMAXPROCS(-1) * 4) + var i atomic.Int32 + + go func() { + logEvery := time.NewTicker(20 * time.Second) + defer logEvery.Stop() + + for int(i.Load()) < len(torrentList) { + select { + case <-gctx.Done(): + return + case <-logEvery.C: + if int(i.Load()) == len(torrentList) { + return + } + log.Info("[snapshot uploader] Creating .torrent files", "progress", fmt.Sprintf("%d/%d", i.Load(), len(torrentList))) + } + } + }() + + for _, s := range torrentList { + state := s + + g.Go(func() error { + defer i.Add(1) + + err := downloader.BuildTorrentIfNeed(gctx, state.file, u.cfg.dirs.Snap, u.torrentFiles) + + state.Lock() + state.buildingTorrent = false + state.Unlock() + + if err != nil { + return err + } + + torrent, err := u.torrentFiles.LoadByName(state.file) + + if err != nil { + return err + } + + state.Lock() + state.torrent = torrent + state.Unlock() + + state.localHash = state.torrent.InfoHash.String() + + logger.Info("[snapshot uploader] built torrent", "file", state.file, "hash", state.localHash) + + return nil + }) + } + + if err := g.Wait(); err != nil { + logger.Debug(".torrent file creation failed", "err", err) + } + } + + var f atomic.Int32 + + var uploadList []*uploadState + + for _, state := range processList { + err := func() error { + state.Lock() + defer state.Unlock() + if !state.remote && state.torrent != nil && len(state.uploads) == 0 && u.rclone != nil { + state.uploads = []string{state.file, state.file + ".torrent"} + uploadList = append(uploadList, state) + } + + return nil + }() + + if err != nil { + logger.Debug("upload failed", "file", state.file, "err", err) + } + } + + if len(uploadList) > 0 { + log.Info("[snapshot uploader] Starting upload", "count", len(uploadList)) + + g, gctx := errgroup.WithContext(ctx) + g.SetLimit(16) + var i atomic.Int32 + + go func() { + logEvery := time.NewTicker(20 * time.Second) + defer logEvery.Stop() + + for int(i.Load()) < len(processList) { + select { + case <-gctx.Done(): + log.Info("[snapshot uploader] Uploaded files", "processed", fmt.Sprintf("%d/%d/%d", i.Load(), len(processList), f.Load())) + return + case <-logEvery.C: + if int(i.Load()+f.Load()) == len(processList) { + return + } + log.Info("[snapshot uploader] Uploading files", "progress", fmt.Sprintf("%d/%d/%d", i.Load(), len(processList), f.Load())) + } + } + }() + + for _, s := range uploadList { + state := s + func() { + state.Lock() + defer state.Unlock() + + g.Go(func() error { + defer i.Add(1) + defer func() { + state.Lock() + state.uploads = nil + state.Unlock() + }() + + if err := u.uploadSession.Upload(gctx, state.uploads...); err != nil { + f.Add(1) + return nil + } + + uploadCount++ + + state.Lock() + state.remote = true + state.hasRemoteTorrent = true + state.Unlock() + return nil + }) + }() + } + + if err := g.Wait(); err != nil { + logger.Debug("[snapshot uploader] upload failed", "err", err) + } + } + + if f.Load() == 0 { + break + } + + time.Sleep(retryTime) + + if retryTime < maxRetryTime { + retryTime += retryTime + } else { + retryTime = maxRetryTime + } + } + + var err error + + if uploadCount > 0 { + err = u.uploadManifest(ctx, false) + } + + if err == nil { + if maxUploaded := u.maxUploadedHeader(); u.cfg.syncConfig.FrozenBlockLimit > 0 && maxUploaded > u.cfg.syncConfig.FrozenBlockLimit { + u.removeBefore(maxUploaded - u.cfg.syncConfig.FrozenBlockLimit) + } + } +} diff --git a/eth/stagedsync/sync.go b/eth/stagedsync/sync.go index 1112fad19bf..fd3782482cc 100644 --- a/eth/stagedsync/sync.go +++ b/eth/stagedsync/sync.go @@ -10,13 +10,16 @@ import ( "github.com/ledgerwatch/erigon-lib/kv" "github.com/ledgerwatch/log/v3" + "github.com/ledgerwatch/erigon/eth/ethconfig" "github.com/ledgerwatch/erigon/eth/stagedsync/stages" ) type Sync struct { + cfg ethconfig.Sync unwindPoint *uint64 // used to run stages prevUnwindPoint *uint64 // used to get value from outside of staged sync after cycle (for example to notify RPCDaemon) unwindReason UnwindReason + posTransition *uint64 stages []*Stage unwindOrder []*Stage @@ -138,7 +141,7 @@ func (s *Sync) SetCurrentStage(id stages.SyncStage) error { return fmt.Errorf("stage not found with id: %v", id) } -func New(stagesList []*Stage, unwindOrder UnwindOrder, pruneOrder PruneOrder, logger log.Logger) *Sync { +func New(cfg ethconfig.Sync, stagesList []*Stage, unwindOrder UnwindOrder, pruneOrder PruneOrder, logger log.Logger) *Sync { unwindStages := make([]*Stage, len(stagesList)) for i, stageIndex := range unwindOrder { for _, s := range stagesList { @@ -163,6 +166,7 @@ func New(stagesList []*Stage, unwindOrder UnwindOrder, pruneOrder PruneOrder, lo } return &Sync{ + cfg: cfg, stages: stagesList, currentStage: 0, unwindOrder: unwindStages, @@ -269,6 +273,11 @@ func (s *Sync) RunNoInterrupt(db kv.RwDB, tx kv.RwTx, firstCycle bool) error { return libcommon.ErrStopped } + if string(stage.ID) == s.cfg.BreakAfterStage { // break process loop + s.logger.Warn("--sync.loop.break caused stage break") + break + } + s.NextStage() } @@ -280,10 +289,12 @@ func (s *Sync) RunNoInterrupt(db kv.RwDB, tx kv.RwTx, firstCycle bool) error { return nil } -func (s *Sync) Run(db kv.RwDB, tx kv.RwTx, firstCycle bool) error { +func (s *Sync) Run(db kv.RwDB, tx kv.RwTx, firstCycle bool) (bool, error) { s.prevUnwindPoint = nil s.timings = s.timings[:0] + hasMore := false + for !s.IsDone() { var badBlockUnwind bool if s.unwindPoint != nil { @@ -292,7 +303,7 @@ func (s *Sync) Run(db kv.RwDB, tx kv.RwTx, firstCycle bool) error { continue } if err := s.unwindStage(firstCycle, s.unwindOrder[j], db, tx); err != nil { - return err + return false, err } } s.prevUnwindPoint = s.unwindPoint @@ -302,7 +313,7 @@ func (s *Sync) Run(db kv.RwDB, tx kv.RwTx, firstCycle bool) error { } s.unwindReason = UnwindReason{} if err := s.SetCurrentStage(s.stages[0].ID); err != nil { - return err + return false, err } // If there were unwinds at the start, a heavier but invalid chain may be present, so // we relax the rules for Stage1 @@ -318,7 +329,7 @@ func (s *Sync) Run(db kv.RwDB, tx kv.RwTx, firstCycle bool) error { if string(stage.ID) == dbg.StopBeforeStage() { // stop process for debugging reasons s.logger.Warn("STOP_BEFORE_STAGE env flag forced to stop app") - return libcommon.ErrStopped + return false, libcommon.ErrStopped } if stage.Disabled || stage.Forward == nil { @@ -329,23 +340,46 @@ func (s *Sync) Run(db kv.RwDB, tx kv.RwTx, firstCycle bool) error { } if err := s.runStage(stage, db, tx, firstCycle, badBlockUnwind); err != nil { - return err + return false, err } if string(stage.ID) == dbg.StopAfterStage() { // stop process for debugging reasons s.logger.Warn("STOP_AFTER_STAGE env flag forced to stop app") - return libcommon.ErrStopped + return false, libcommon.ErrStopped + } + + if string(stage.ID) == s.cfg.BreakAfterStage { // break process loop + s.logger.Warn("--sync.loop.break caused stage break") + if s.posTransition != nil { + ptx := tx + + if ptx == nil { + if tx, err := db.BeginRw(context.Background()); err == nil { + ptx = tx + defer tx.Rollback() + } + } + + if ptx != nil { + if progress, err := stages.GetStageProgress(ptx, stage.ID); err == nil { + hasMore = progress < *s.posTransition + } + } + } else { + hasMore = true + } + break } s.NextStage() } if err := s.SetCurrentStage(s.stages[0].ID); err != nil { - return err + return false, err } s.currentStage = 0 - return nil + return hasMore, nil } func (s *Sync) RunPrune(db kv.RwDB, tx kv.RwTx, firstCycle bool) error { diff --git a/eth/stagedsync/sync_test.go b/eth/stagedsync/sync_test.go index 8992c31c5a0..48a6c475478 100644 --- a/eth/stagedsync/sync_test.go +++ b/eth/stagedsync/sync_test.go @@ -10,6 +10,7 @@ import ( "github.com/ledgerwatch/log/v3" "github.com/stretchr/testify/assert" + "github.com/ledgerwatch/erigon/eth/ethconfig" "github.com/ledgerwatch/erigon/eth/stagedsync/stages" ) @@ -41,9 +42,9 @@ func TestStagesSuccess(t *testing.T) { }, }, } - state := New(s, nil, nil, log.New()) + state := New(ethconfig.Defaults.Sync, s, nil, nil, log.New()) db, tx := memdb.NewTestTx(t) - err := state.Run(db, tx, true /* initialCycle */) + _, err := state.Run(db, tx, true /* initialCycle */) assert.NoError(t, err) expectedFlow := []stages.SyncStage{ @@ -81,9 +82,9 @@ func TestDisabledStages(t *testing.T) { }, }, } - state := New(s, nil, nil, log.New()) + state := New(ethconfig.Defaults.Sync, s, nil, nil, log.New()) db, tx := memdb.NewTestTx(t) - err := state.Run(db, tx, true /* initialCycle */) + _, err := state.Run(db, tx, true /* initialCycle */) assert.NoError(t, err) expectedFlow := []stages.SyncStage{ @@ -121,9 +122,9 @@ func TestErroredStage(t *testing.T) { }, }, } - state := New(s, []stages.SyncStage{s[2].ID, s[1].ID, s[0].ID}, nil, log.New()) + state := New(ethconfig.Defaults.Sync, s, []stages.SyncStage{s[2].ID, s[1].ID, s[0].ID}, nil, log.New()) db, tx := memdb.NewTestTx(t) - err := state.Run(db, tx, true /* initialCycle */) + _, err := state.Run(db, tx, true /* initialCycle */) assert.Equal(t, fmt.Errorf("[2/3 Bodies] %w", expectedErr), err) expectedFlow := []stages.SyncStage{ @@ -204,9 +205,9 @@ func TestUnwindSomeStagesBehindUnwindPoint(t *testing.T) { }, }, } - state := New(s, []stages.SyncStage{s[3].ID, s[2].ID, s[1].ID, s[0].ID}, nil, log.New()) + state := New(ethconfig.Defaults.Sync, s, []stages.SyncStage{s[3].ID, s[2].ID, s[1].ID, s[0].ID}, nil, log.New()) db, tx := memdb.NewTestTx(t) - err := state.Run(db, tx, true /* initialCycle */) + _, err := state.Run(db, tx, true /* initialCycle */) assert.NoError(t, err) expectedFlow := []stages.SyncStage{ @@ -297,9 +298,9 @@ func TestUnwind(t *testing.T) { }, }, } - state := New(s, []stages.SyncStage{s[3].ID, s[2].ID, s[1].ID, s[0].ID}, nil, log.New()) + state := New(ethconfig.Defaults.Sync, s, []stages.SyncStage{s[3].ID, s[2].ID, s[1].ID, s[0].ID}, nil, log.New()) db, tx := memdb.NewTestTx(t) - err := state.Run(db, tx, true /* initialCycle */) + _, err := state.Run(db, tx, true /* initialCycle */) assert.NoError(t, err) expectedFlow := []stages.SyncStage{ @@ -326,7 +327,7 @@ func TestUnwind(t *testing.T) { flow = flow[:0] state.unwindOrder = []*Stage{s[3], s[2], s[1], s[0]} state.UnwindTo(100, UnwindReason{}) - err = state.Run(db, tx, true /* initialCycle */) + _, err = state.Run(db, tx, true /* initialCycle */) assert.NoError(t, err) expectedFlow = []stages.SyncStage{ @@ -386,9 +387,9 @@ func TestUnwindEmptyUnwinder(t *testing.T) { }, }, } - state := New(s, []stages.SyncStage{s[2].ID, s[1].ID, s[0].ID}, nil, log.New()) + state := New(ethconfig.Defaults.Sync, s, []stages.SyncStage{s[2].ID, s[1].ID, s[0].ID}, nil, log.New()) db, tx := memdb.NewTestTx(t) - err := state.Run(db, tx, true /* initialCycle */) + _, err := state.Run(db, tx, true /* initialCycle */) assert.NoError(t, err) expectedFlow := []stages.SyncStage{ @@ -442,13 +443,13 @@ func TestSyncDoTwice(t *testing.T) { }, } - state := New(s, nil, nil, log.New()) + state := New(ethconfig.Defaults.Sync, s, nil, nil, log.New()) db, tx := memdb.NewTestTx(t) - err := state.Run(db, tx, true /* initialCycle */) + _, err := state.Run(db, tx, true /* initialCycle */) assert.NoError(t, err) - state = New(s, nil, nil, log.New()) - err = state.Run(db, tx, true /* initialCycle */) + state = New(ethconfig.Defaults.Sync, s, nil, nil, log.New()) + _, err = state.Run(db, tx, true /* initialCycle */) assert.NoError(t, err) expectedFlow := []stages.SyncStage{ @@ -500,15 +501,15 @@ func TestStateSyncInterruptRestart(t *testing.T) { }, } - state := New(s, nil, nil, log.New()) + state := New(ethconfig.Defaults.Sync, s, nil, nil, log.New()) db, tx := memdb.NewTestTx(t) - err := state.Run(db, tx, true /* initialCycle */) + _, err := state.Run(db, tx, true /* initialCycle */) assert.Equal(t, fmt.Errorf("[2/3 Bodies] %w", expectedErr), err) expectedErr = nil - state = New(s, nil, nil, log.New()) - err = state.Run(db, tx, true /* initialCycle */) + state = New(ethconfig.Defaults.Sync, s, nil, nil, log.New()) + _, err = state.Run(db, tx, true /* initialCycle */) assert.NoError(t, err) expectedFlow := []stages.SyncStage{ @@ -579,9 +580,9 @@ func TestSyncInterruptLongUnwind(t *testing.T) { }, }, } - state := New(s, []stages.SyncStage{s[2].ID, s[1].ID, s[0].ID}, nil, log.New()) + state := New(ethconfig.Defaults.Sync, s, []stages.SyncStage{s[2].ID, s[1].ID, s[0].ID}, nil, log.New()) db, tx := memdb.NewTestTx(t) - err := state.Run(db, tx, true /* initialCycle */) + _, err := state.Run(db, tx, true /* initialCycle */) assert.Error(t, errInterrupted, err) //state = NewState(s) @@ -589,7 +590,7 @@ func TestSyncInterruptLongUnwind(t *testing.T) { //err = state.LoadUnwindInfo(tx) //assert.NoError(t, err) //state.UnwindTo(500, libcommon.Hash{}) - err = state.Run(db, tx, true /* initialCycle */) + _, err = state.Run(db, tx, true /* initialCycle */) assert.NoError(t, err) expectedFlow := []stages.SyncStage{ diff --git a/eth/stagedsync/test/harness.go b/eth/stagedsync/test/harness.go index ea40c226d93..043f4b24542 100644 --- a/eth/stagedsync/test/harness.go +++ b/eth/stagedsync/test/harness.go @@ -58,6 +58,7 @@ func InitHarness(ctx context.Context, t *testing.T, logger log.Logger, cfg Harne nil, // penalize nil, // not used nil, // not used + nil, ) stateSyncStages := stagedsync.DefaultStages( ctx, @@ -77,7 +78,7 @@ func InitHarness(ctx context.Context, t *testing.T, logger log.Logger, cfg Harne stagedsync.FinishCfg{}, true, ) - stateSync := stagedsync.New(stateSyncStages, stagedsync.DefaultUnwindOrder, stagedsync.DefaultPruneOrder, logger) + stateSync := stagedsync.New(ethconfig.Defaults.Sync, stateSyncStages, stagedsync.DefaultUnwindOrder, stagedsync.DefaultPruneOrder, logger) validatorKey, err := crypto.GenerateKey() require.NoError(t, err) validatorAddress := crypto.PubkeyToAddress(validatorKey.PublicKey) @@ -380,7 +381,7 @@ func (h *Harness) consensusEngine(t *testing.T, cfg HarnessCfg) consensus.Engine return borConsensusEng } - t.Fatal(fmt.Sprintf("unimplmented consensus engine init for cfg %v", cfg.ChainConfig)) + t.Fatalf("unimplmented consensus engine init for cfg %v", cfg.ChainConfig) return nil } diff --git a/p2p/sentry/simulator/sentry_simulator.go b/p2p/sentry/simulator/sentry_simulator.go index ca7dce37979..51eb8c2de68 100644 --- a/p2p/sentry/simulator/sentry_simulator.go +++ b/p2p/sentry/simulator/sentry_simulator.go @@ -32,11 +32,10 @@ type server struct { peers map[[64]byte]*p2p.Peer messageReceivers map[sentry_if.MessageId][]sentry_if.Sentry_MessagesServer logger log.Logger - //snapshotVersion uint8 - knownSnapshots *freezeblocks.RoSnapshots - activeSnapshots *freezeblocks.RoSnapshots - blockReader *freezeblocks.BlockReader - downloader *TorrentClient + knownSnapshots *freezeblocks.RoSnapshots + activeSnapshots *freezeblocks.RoSnapshots + blockReader *freezeblocks.BlockReader + downloader *TorrentClient } func newPeer(name string, caps []p2p.Cap) (*p2p.Peer, error) { @@ -61,13 +60,13 @@ func NewSentry(ctx context.Context, chain string, snapshotLocation string, peerC peers[peer.Pubkey()] = peer } - cfg := snapcfg.KnownCfg(chain) + cfg := snapcfg.KnownCfg(chain, 0) knownSnapshots := freezeblocks.NewRoSnapshots(ethconfig.BlocksFreezing{ Enabled: true, Produce: false, NoDownloader: true, - }, "" /*s.snapshotVersion,*/, logger) + }, "", cfg.Version, logger) files := make([]string, 0, len(cfg.Preverified)) @@ -82,7 +81,7 @@ func NewSentry(ctx context.Context, chain string, snapshotLocation string, peerC Enabled: true, Produce: false, NoDownloader: true, - }, snapshotLocation /*s.snapshotVersion,*/, logger) + }, snapshotLocation, cfg.Version, logger) if err := activeSnapshots.ReopenFolder(); err != nil { return nil, err @@ -437,7 +436,7 @@ func (s *server) getHeaderByHash(ctx context.Context, hash common.Hash) (*core_t } func (s *server) downloadHeaders(ctx context.Context, header *freezeblocks.HeaderSegment) error { - fileName := snaptype.SegmentFileName(header.From(), header.To(), snaptype.Headers) + fileName := snaptype.SegmentFileName(s.knownSnapshots.Version(), header.From(), header.To(), snaptype.Headers) s.logger.Info(fmt.Sprintf("Downloading %s", fileName)) @@ -450,5 +449,5 @@ func (s *server) downloadHeaders(ctx context.Context, header *freezeblocks.Heade s.logger.Info(fmt.Sprintf("Indexing %s", fileName)) return freezeblocks.HeadersIdx(ctx, - filepath.Join(s.downloader.LocalFsRoot(), fileName), header.From(), s.downloader.LocalFsRoot(), nil, log.LvlDebug, s.logger) + filepath.Join(s.downloader.LocalFsRoot(), fileName), s.knownSnapshots.Version(), header.From(), s.downloader.LocalFsRoot(), nil, log.LvlDebug, s.logger) } diff --git a/p2p/sentry/simulator/syncutil.go b/p2p/sentry/simulator/syncutil.go index eb7980ceeee..c38877b4fc3 100644 --- a/p2p/sentry/simulator/syncutil.go +++ b/p2p/sentry/simulator/syncutil.go @@ -111,7 +111,7 @@ func NewTorrentClient(ctx context.Context, chain string, torrentDir string, logg } items := map[string]snapcfg.PreverifiedItem{} - for _, it := range snapcfg.KnownCfg(chain).Preverified { + for _, it := range snapcfg.KnownCfg(chain, 0).Preverified { items[it.Name] = it } diff --git a/turbo/app/README.md b/turbo/app/README.md new file mode 100644 index 00000000000..c7bc4b7f61d --- /dev/null +++ b/turbo/app/README.md @@ -0,0 +1,71 @@ +# Erigon Sub Commands + +## Backup + +## Import + +## Init + +## Support + +## Snapshots + +This sub command can be used for manipulating snapshot files + +### Uploader + +The `snapshots uploader` command starts a version of erigon customized for uploading snapshot files to +a remote location. + +It breaks the stage execution process after the senders stage and then uses the snapshot stage to send +uploaded headers, bodies and (in the case of polygon) bor spans and events to snapshot files. Because +this process avoids execution in run signifigantly faster than a standard erigon configuration. + +The uploader uses rclone to send seedable (100K or 500K blocks) to a remote storage location specified +in the rclone config file. + +The **uploader** is configured to minimize disk usage by doing the following: + +* It removes snapshots once they are loaded +* It agressively prunes the database once entites are transferred to snapshots + +in addition to this it has the following performance related features: + +* maximises the workers allocated to snaphot processing to improve thoughtput +* Can be started from scratch by downloading the latest snapshots from the remote location to seed processing + +The following configuration can be used to upload blocks from genesis where: + +| | | +|---|---| +| sync.loop.prune.limit=500000 | Sets the records to be pruned to the database to 500,000 per iteration (as opposed to 100) | +| upload.location=r2:erigon-v2-snapshots-bor-mainnet | Specified the rclone loaction to upload snapshot to | +| upload.from=earliest | Sets the upload start location to be the earliest availible block, which will be 0 in the case of a fresh installtion, or specified by the last block in the chaindata db | +| upload.snapshot.limit=1500000 | Tells the uploader to keep a maximum 1,500,000 blocks in the `snapshots` before deleting the aged snapshot | +| snapshot.version=2 | Indivates the version to be appended to snapshot file names when they are creatated| + + +```shell +erigon/build/bin/erigon snapshots uploader --datadir=~/snapshots/bor-mainnet --chain=bor-mainnet \ + --bor.heimdall=https://heimdall-api.polygon.technology --bor.milestone=false --sync.loop.prune.limit=500000 \ + --upload.location=r2:erigon-v2-snapshots-bor-mainnet --upload.from=earliest --snapshot.version=2 \ + --upload.snapshot.limit=1500000 +``` + +In order to start with the lates uploaded block when starting with an empty drive set the `upload.from` flag to `latest`. e.g. + +```shell +--upload.from=latest +``` + +The configuration of the uploader implicitly sets the following flag values on start-up: + +```shell + --sync.loop.break.after=Senders + --sync.loop.block.limit=100000 + --sync.loop.prune.limit=100000 + --upload.snapshot.limit=1500000 + --nodownloader=true + --http.enables=false + --txpool.disable=true +``` diff --git a/turbo/app/make_app.go b/turbo/app/make_app.go index a591af54265..d4e5f17bbb2 100644 --- a/turbo/app/make_app.go +++ b/turbo/app/make_app.go @@ -51,23 +51,8 @@ func MakeApp(name string, action cli.ActionFunc, cliFlags []cli.Flag) *cli.App { // run default action return action(context) } - app.Flags = append(cliFlags, debug.Flags...) // debug flags are required - app.Flags = append(app.Flags, utils.MetricFlags...) - app.Flags = append(app.Flags, logging.Flags...) - app.Flags = append(app.Flags, &utils.ConfigFlag) - // remove exact duplicate flags, keeping only the first one. this will allow easier composition later down the line - allFlags := app.Flags - newFlags := make([]cli.Flag, 0, len(allFlags)) - seen := map[string]struct{}{} - for _, vv := range allFlags { - v := vv - if _, ok := seen[v.String()]; ok { - continue - } - newFlags = append(newFlags, v) - } - app.Flags = newFlags + app.Flags = appFlags(cliFlags) app.After = func(ctx *cli.Context) error { debug.Exit() @@ -83,6 +68,28 @@ func MakeApp(name string, action cli.ActionFunc, cliFlags []cli.Flag) *cli.App { return app } +func appFlags(cliFlags []cli.Flag) []cli.Flag { + + flags := append(cliFlags, debug.Flags...) // debug flags are required + flags = append(flags, utils.MetricFlags...) + flags = append(flags, logging.Flags...) + flags = append(flags, &utils.ConfigFlag) + + // remove exact duplicate flags, keeping only the first one. this will allow easier composition later down the line + allFlags := flags + newFlags := make([]cli.Flag, 0, len(allFlags)) + seen := map[string]struct{}{} + for _, vv := range allFlags { + v := vv + if _, ok := seen[v.String()]; ok { + continue + } + newFlags = append(newFlags, v) + } + + return newFlags +} + // MigrateFlags makes all global flag values available in the // context. This should be called as early as possible in app.Before. // diff --git a/turbo/app/snapshots_cmd.go b/turbo/app/snapshots_cmd.go index 482d15bf266..e55d48001ec 100644 --- a/turbo/app/snapshots_cmd.go +++ b/turbo/app/snapshots_cmd.go @@ -8,14 +8,17 @@ import ( "errors" "fmt" "io" + "net/http" "os" "path/filepath" "runtime" "time" "github.com/c2h5oh/datasize" + "github.com/ledgerwatch/erigon-lib/chain/snapcfg" "github.com/ledgerwatch/erigon-lib/common/dbg" "github.com/ledgerwatch/erigon-lib/common/dir" + "github.com/ledgerwatch/erigon-lib/metrics" "github.com/ledgerwatch/log/v3" "github.com/urfave/cli/v2" @@ -33,11 +36,15 @@ import ( "github.com/ledgerwatch/erigon/cmd/utils" "github.com/ledgerwatch/erigon/core/rawdb" "github.com/ledgerwatch/erigon/core/rawdb/blockio" + "github.com/ledgerwatch/erigon/diagnostics" "github.com/ledgerwatch/erigon/eth/ethconfig" "github.com/ledgerwatch/erigon/eth/ethconfig/estimate" "github.com/ledgerwatch/erigon/eth/stagedsync/stages" + "github.com/ledgerwatch/erigon/params" + erigoncli "github.com/ledgerwatch/erigon/turbo/cli" "github.com/ledgerwatch/erigon/turbo/debug" "github.com/ledgerwatch/erigon/turbo/logging" + "github.com/ledgerwatch/erigon/turbo/node" "github.com/ledgerwatch/erigon/turbo/snapshotsync/freezeblocks" ) @@ -81,6 +88,17 @@ var snapshotCommand = cli.Command{ &SnapshotEveryFlag, }), }, + { + Name: "uploader", + Action: doUploaderCommand, + Usage: "run erigon in snapshot upload mode (no execution)", + Flags: uploaderCommandFlags([]cli.Flag{ + &SnapshotVersionFlag, + &erigoncli.UploadLocationFlag, + &erigoncli.UploadFromFlag, + &erigoncli.FrozenBlockLimitFlag, + }), + }, { Name: "uncompress", Action: doUncompress, @@ -135,6 +153,11 @@ var ( Usage: "Do operation every N blocks", Value: 1_000, } + SnapshotVersionFlag = cli.IntFlag{ + Name: "snapshot.version", + Usage: "Snapshot files version.", + Value: 1, + } SnapshotRebuildFlag = cli.BoolFlag{ Name: "rebuild", Usage: "Force rebuild", @@ -259,7 +282,8 @@ func doIndicesCommand(cliCtx *cli.Context) error { } cfg := ethconfig.NewSnapCfg(true, false, true) - blockSnaps, borSnaps, br, agg, err := openSnaps(ctx, cfg, dirs, chainDB, logger) + blockSnaps, borSnaps, br, agg, err := openSnaps(ctx, cfg, dirs, snapcfg.KnownCfg(chainConfig.ChainName, 0).Version, chainDB, logger) + if err != nil { return err } @@ -277,20 +301,20 @@ func doIndicesCommand(cliCtx *cli.Context) error { return nil } -func openSnaps(ctx context.Context, cfg ethconfig.BlocksFreezing, dirs datadir.Dirs, chainDB kv.RwDB, logger log.Logger) ( +func openSnaps(ctx context.Context, cfg ethconfig.BlocksFreezing, dirs datadir.Dirs, version uint8, chainDB kv.RwDB, logger log.Logger) ( blockSnaps *freezeblocks.RoSnapshots, borSnaps *freezeblocks.BorRoSnapshots, br *freezeblocks.BlockRetire, agg *libstate.AggregatorV3, err error, ) { - blockSnaps = freezeblocks.NewRoSnapshots(cfg, dirs.Snap, logger) + blockSnaps = freezeblocks.NewRoSnapshots(cfg, dirs.Snap, version, logger) if err = blockSnaps.ReopenFolder(); err != nil { return } - blockSnaps.LogStat() + blockSnaps.LogStat("open") - borSnaps = freezeblocks.NewBorRoSnapshots(cfg, dirs.Snap, logger) + borSnaps = freezeblocks.NewBorRoSnapshots(cfg, dirs.Snap, version, logger) if err = borSnaps.ReopenFolder(); err != nil { return } - borSnaps.LogStat() + borSnaps.LogStat("open") agg, err = libstate.NewAggregatorV3(ctx, dirs.SnapHistory, dirs.Tmp, ethconfig.HistoryV3AggregationStep, chainDB, logger) if err != nil { @@ -425,11 +449,13 @@ func doRetireCommand(cliCtx *cli.Context) error { from := cliCtx.Uint64(SnapshotFromFlag.Name) to := cliCtx.Uint64(SnapshotToFlag.Name) every := cliCtx.Uint64(SnapshotEveryFlag.Name) + version := uint8(cliCtx.Int(SnapshotVersionFlag.Name)) + db := mdbx.NewMDBX(logger).Label(kv.ChainDB).Path(dirs.Chaindata).MustOpen() defer db.Close() cfg := ethconfig.NewSnapCfg(true, false, true) - blockSnaps, borSnaps, br, agg, err := openSnaps(ctx, cfg, dirs, db, logger) + blockSnaps, borSnaps, br, agg, err := openSnaps(ctx, cfg, dirs, version, db, logger) if err != nil { return err } @@ -458,7 +484,7 @@ func doRetireCommand(cliCtx *cli.Context) error { } logger.Info("Params", "from", from, "to", to, "every", every) - if err := br.RetireBlocks(ctx, forwardProgress, log.LvlInfo, nil, nil); err != nil { + if err := br.RetireBlocks(ctx, 0, forwardProgress, log.LvlInfo, nil, nil); err != nil { return err } @@ -554,3 +580,61 @@ func doRetireCommand(cliCtx *cli.Context) error { return nil } + +func uploaderCommandFlags(flags []cli.Flag) []cli.Flag { + erigoncli.SyncLoopBreakAfterFlag.Value = "Senders" + erigoncli.SyncLoopBlockLimitFlag.Value = 100000 + erigoncli.SyncLoopPruneLimitFlag.Value = 100000 + erigoncli.FrozenBlockLimitFlag.Value = 1500000 + utils.NoDownloaderFlag.Value = true + utils.HTTPEnabledFlag.Value = false + utils.TxPoolDisableFlag.Value = true + return joinFlags(erigoncli.DefaultFlags, flags, []cli.Flag{ + &erigoncli.SyncLoopBreakAfterFlag, + &erigoncli.SyncLoopBlockLimitFlag, + &erigoncli.SyncLoopPruneLimitFlag, + }) +} + +func doUploaderCommand(cliCtx *cli.Context) error { + var logger log.Logger + var err error + var metricsMux *http.ServeMux + + if logger, metricsMux, err = debug.Setup(cliCtx, true /* root logger */); err != nil { + return err + } + + // initializing the node and providing the current git commit there + + logger.Info("Build info", "git_branch", params.GitBranch, "git_tag", params.GitTag, "git_commit", params.GitCommit) + erigonInfoGauge := metrics.GetOrCreateGauge(fmt.Sprintf(`erigon_info{version="%s",commit="%s"}`, params.Version, params.GitCommit)) + erigonInfoGauge.Set(1) + + if version := uint8(cliCtx.Int(SnapshotVersionFlag.Name)); version != 0 { + snapcfg.SnapshotVersion(version) + } + + nodeCfg := node.NewNodConfigUrfave(cliCtx, logger) + if err := datadir.ApplyMigrations(nodeCfg.Dirs); err != nil { + return err + } + + ethCfg := node.NewEthConfigUrfave(cliCtx, nodeCfg, logger) + + ethNode, err := node.New(cliCtx.Context, nodeCfg, ethCfg, logger) + if err != nil { + log.Error("Erigon startup", "err", err) + return err + } + + if metricsMux != nil { + diagnostics.Setup(cliCtx, metricsMux, ethNode) + } + + err = ethNode.Serve() + if err != nil { + log.Error("error while serving an Erigon node", "err", err) + } + return err +} diff --git a/turbo/cli/flags.go b/turbo/cli/flags.go index 748fab507f3..339239bc49d 100644 --- a/turbo/cli/flags.go +++ b/turbo/cli/flags.go @@ -10,6 +10,7 @@ import ( libcommon "github.com/ledgerwatch/erigon-lib/common" + "github.com/ledgerwatch/erigon/rpc" "github.com/ledgerwatch/erigon/rpc/rpccfg" "github.com/c2h5oh/datasize" @@ -148,6 +149,42 @@ var ( Value: "", } + SyncLoopPruneLimitFlag = cli.UintFlag{ + Name: "sync.loop.prune.limit", + Usage: "Sets the maximum number of block to prune per loop iteration", + Value: 100, + } + + SyncLoopBreakAfterFlag = cli.StringFlag{ + Name: "sync.loop.break", + Usage: "Sets the last stage of the sync loop to run", + Value: "", + } + + SyncLoopBlockLimitFlag = cli.UintFlag{ + Name: "sync.loop.block.limit", + Usage: "Sets the maximum number of blocks to process per loop iteration", + Value: 0, // unlimited + } + + UploadLocationFlag = cli.StringFlag{ + Name: "upload.location", + Usage: "Location to upload snapshot segments to", + Value: "", + } + + UploadFromFlag = cli.StringFlag{ + Name: "upload.from", + Usage: "Blocks to upload from: number, or 'earliest' (start of the chain), 'latest' (last segment previously uploaded)", + Value: "latest", + } + + FrozenBlockLimitFlag = cli.UintFlag{ + Name: "upload.snapshot.limit", + Usage: "Sets the maximum number of snapshot blocks to hold on the local disk when uploading", + Value: 1500000, + } + BadBlockFlag = cli.StringFlag{ Name: "bad.block", Usage: "Marks block with given hex string as bad and forces initial reorg before normal staged sync", @@ -255,6 +292,32 @@ func ApplyFlagsForEthConfig(ctx *cli.Context, cfg *ethconfig.Config, logger log. cfg.Sync.LoopThrottle = syncLoopThrottle } + if limit := ctx.Uint(SyncLoopPruneLimitFlag.Name); limit > 0 { + cfg.Sync.PruneLimit = int(limit) + } + + if stage := ctx.String(SyncLoopBreakAfterFlag.Name); len(stage) > 0 { + cfg.Sync.BreakAfterStage = stage + } + + if limit := ctx.Uint(SyncLoopBlockLimitFlag.Name); limit > 0 { + cfg.Sync.LoopBlockLimit = limit + } + + if location := ctx.String(UploadLocationFlag.Name); len(location) > 0 { + cfg.Sync.UploadLocation = location + } + + if blockno := ctx.String(UploadFromFlag.Name); len(blockno) > 0 { + cfg.Sync.UploadFrom = rpc.AsBlockNumber(blockno) + } else { + cfg.Sync.UploadFrom = rpc.LatestBlockNumber + } + + if limit := ctx.Uint(FrozenBlockLimitFlag.Name); limit > 0 { + cfg.Sync.FrozenBlockLimit = uint64(limit) + } + if ctx.String(BadBlockFlag.Name) != "" { bytes, err := hexutil.Decode(ctx.String(BadBlockFlag.Name)) if err != nil { @@ -354,7 +417,6 @@ func setEmbeddedRpcDaemon(ctx *cli.Context, cfg *nodecfg.Config, logger log.Logg } apis := ctx.String(utils.HTTPApiFlag.Name) - logger.Info("starting HTTP APIs", "APIs", apis) c := &httpcfg.HttpCfg{ Enabled: ctx.Bool(utils.HTTPEnabledFlag.Name), @@ -408,6 +470,11 @@ func setEmbeddedRpcDaemon(ctx *cli.Context, cfg *nodecfg.Config, logger log.Logg StateCache: kvcache.DefaultCoherentConfig, RPCSlowLogThreshold: ctx.Duration(utils.RPCSlowFlag.Name), } + + if c.Enabled { + logger.Info("starting HTTP APIs", "APIs", apis) + } + if ctx.IsSet(utils.HttpCompressionFlag.Name) { c.HttpCompression = ctx.Bool(utils.HttpCompressionFlag.Name) } else { diff --git a/turbo/debug/flags.go b/turbo/debug/flags.go index 1be6efa51b3..a2bd0f10a94 100644 --- a/turbo/debug/flags.go +++ b/turbo/debug/flags.go @@ -184,7 +184,7 @@ func Setup(ctx *cli.Context, rootLogger bool) (log.Logger, *http.ServeMux, error RaiseFdLimit() - logger := logging.SetupLoggerCtx("erigon", ctx, rootLogger) + logger := logging.SetupLoggerCtx("erigon", ctx, log.LvlInfo, log.LvlInfo, rootLogger) if traceFile := ctx.String(traceFlag.Name); traceFile != "" { if err := Handler.StartGoTrace(traceFile); err != nil { diff --git a/turbo/execution/eth1/ethereum_execution.go b/turbo/execution/eth1/ethereum_execution.go index baa1e8e82df..2fdc2121a79 100644 --- a/turbo/execution/eth1/ethereum_execution.go +++ b/turbo/execution/eth1/ethereum_execution.go @@ -229,18 +229,25 @@ func (e *EthereumExecutionModule) purgeBadChain(ctx context.Context, tx kv.RwTx, func (e *EthereumExecutionModule) Start(ctx context.Context) { e.semaphore.Acquire(ctx, 1) defer e.semaphore.Release(1) - // Run the forkchoice - if err := e.executionPipeline.Run(e.db, nil, true); err != nil { - if !errors.Is(err, context.Canceled) { - e.logger.Error("Could not start execution service", "err", err) + + more := true + + for more { + var err error + + if more, err = e.executionPipeline.Run(e.db, nil, true); err != nil { + if !errors.Is(err, context.Canceled) { + e.logger.Error("Could not start execution service", "err", err) + } + continue } - return - } - if err := e.executionPipeline.RunPrune(e.db, nil, true); err != nil { - if !errors.Is(err, context.Canceled) { - e.logger.Error("Could not start execution service", "err", err) + + if err := e.executionPipeline.RunPrune(e.db, nil, true); err != nil { + if !errors.Is(err, context.Canceled) { + e.logger.Error("Could not start execution service", "err", err) + } + continue } - return } } diff --git a/turbo/execution/eth1/forkchoice.go b/turbo/execution/eth1/forkchoice.go index 1ed0851c587..574735846c6 100644 --- a/turbo/execution/eth1/forkchoice.go +++ b/turbo/execution/eth1/forkchoice.go @@ -305,7 +305,7 @@ func (e *EthereumExecutionModule) updateForkChoice(ctx context.Context, blockHas } } // Run the forkchoice - if err := e.executionPipeline.Run(e.db, tx, false); err != nil { + if _, err := e.executionPipeline.Run(e.db, tx, false); err != nil { err = fmt.Errorf("updateForkChoice: %w", err) sendForkchoiceErrorWithoutWaiting(outcomeCh, err) return diff --git a/turbo/logging/logging.go b/turbo/logging/logging.go index 988fa7fb5da..f36b0999b4c 100644 --- a/turbo/logging/logging.go +++ b/turbo/logging/logging.go @@ -3,7 +3,6 @@ package logging import ( "flag" "os" - "path" "path/filepath" "strconv" @@ -21,7 +20,8 @@ import ( // This function which is used in Erigon itself. // Note: urfave and cobra are two CLI frameworks/libraries for the same functionalities // and it would make sense to choose one over another -func SetupLoggerCtx(filePrefix string, ctx *cli.Context, rootHandler bool) log.Logger { +func SetupLoggerCtx(filePrefix string, ctx *cli.Context, + consoleDefaultLevel log.Lvl, dirDefaultLevel log.Lvl, rootHandler bool) log.Logger { var consoleJson = ctx.Bool(LogJsonFlag.Name) || ctx.Bool(LogConsoleJsonFlag.Name) var dirJson = ctx.Bool(LogDirJsonFlag.Name) @@ -30,13 +30,13 @@ func SetupLoggerCtx(filePrefix string, ctx *cli.Context, rootHandler bool) log.L // try verbosity flag consoleLevel, lErr = tryGetLogLevel(ctx.String(LogVerbosityFlag.Name)) if lErr != nil { - consoleLevel = log.LvlInfo + consoleLevel = consoleDefaultLevel } } dirLevel, dErr := tryGetLogLevel(ctx.String(LogDirVerbosityFlag.Name)) if dErr != nil { - dirLevel = log.LvlInfo + dirLevel = dirDefaultLevel } dirPath := "" @@ -202,7 +202,7 @@ func initSeparatedLogging( } lumberjack := &lumberjack.Logger{ - Filename: path.Join(dirPath, filePrefix+".log"), + Filename: filepath.Join(dirPath, filePrefix+".log"), MaxSize: 100, // megabytes MaxBackups: 3, MaxAge: 28, //days diff --git a/turbo/services/interfaces.go b/turbo/services/interfaces.go index 130337fbf3d..0857520ac88 100644 --- a/turbo/services/interfaces.go +++ b/turbo/services/interfaces.go @@ -90,17 +90,19 @@ type FullBlockReader interface { } type BlockSnapshots interface { - LogStat() + LogStat(label string) ReopenFolder() error SegmentsMax() uint64 + SegmentsMin() uint64 } // BlockRetire - freezing blocks: moving old data from DB to snapshot files type BlockRetire interface { PruneAncientBlocks(tx kv.RwTx, limit int) error - RetireBlocksInBackground(ctx context.Context, maxBlockNumInDB uint64, lvl log.Lvl, seedNewSnapshots func(downloadRequest []DownloadRequest) error, onDelete func(l []string) error) + RetireBlocksInBackground(ctx context.Context, miBlockNum uint64, maxBlockNum uint64, lvl log.Lvl, seedNewSnapshots func(downloadRequest []DownloadRequest) error, onDelete func(l []string) error) HasNewFrozenFiles() bool BuildMissedIndicesIfNeed(ctx context.Context, logPrefix string, notifier DBEventNotifier, cc *chain.Config) error + SetWorkers(workers int) } /* @@ -124,6 +126,7 @@ type DBEventNotifier interface { } type DownloadRequest struct { + Version uint8 Path string TorrentHash string } diff --git a/turbo/snapshotsync/freezeblocks/block_reader.go b/turbo/snapshotsync/freezeblocks/block_reader.go index 6b384929ce3..e58e7cdf209 100644 --- a/turbo/snapshotsync/freezeblocks/block_reader.go +++ b/turbo/snapshotsync/freezeblocks/block_reader.go @@ -248,12 +248,7 @@ type BlockReader struct { } func NewBlockReader(snapshots services.BlockSnapshots, borSnapshots services.BlockSnapshots) *BlockReader { - var borSn *BorRoSnapshots - - if borSnapshots != nil { - borSn = borSnapshots.(*BorRoSnapshots) - } - + borSn, _ := borSnapshots.(*BorRoSnapshots) return &BlockReader{sn: snapshots.(*RoSnapshots), borSn: borSn} } @@ -269,8 +264,13 @@ func (r *BlockReader) BorSnapshots() services.BlockSnapshots { return nil } -func (r *BlockReader) FrozenBlocks() uint64 { return r.sn.BlocksAvailable() } -func (r *BlockReader) FrozenBorBlocks() uint64 { return r.borSn.BlocksAvailable() } +func (r *BlockReader) FrozenBlocks() uint64 { return r.sn.BlocksAvailable() } +func (r *BlockReader) FrozenBorBlocks() uint64 { + if r.borSn != nil { + return r.borSn.BlocksAvailable() + } + return 0 +} func (r *BlockReader) FrozenFiles() []string { files := r.sn.Files() if r.borSn != nil { @@ -286,16 +286,18 @@ func (r *BlockReader) HeadersRange(ctx context.Context, walker func(header *type } func (r *BlockReader) HeaderByNumber(ctx context.Context, tx kv.Getter, blockHeight uint64) (h *types.Header, err error) { - blockHash, err := rawdb.ReadCanonicalHash(tx, blockHeight) - if err != nil { - return nil, err - } - if blockHash == (common.Hash{}) { - return nil, nil - } - h = rawdb.ReadHeader(tx, blockHash, blockHeight) - if h != nil { - return h, nil + if tx != nil { + blockHash, err := rawdb.ReadCanonicalHash(tx, blockHeight) + if err != nil { + return nil, err + } + if blockHash == (common.Hash{}) { + return nil, nil + } + h = rawdb.ReadHeader(tx, blockHash, blockHeight) + if h != nil { + return h, nil + } } view := r.sn.View() @@ -394,13 +396,14 @@ func (r *BlockReader) Header(ctx context.Context, tx kv.Getter, hash common.Hash } func (r *BlockReader) BodyWithTransactions(ctx context.Context, tx kv.Getter, hash common.Hash, blockHeight uint64) (body *types.Body, err error) { - - body, err = rawdb.ReadBodyWithTransactions(tx, hash, blockHeight) - if err != nil { - return nil, err - } - if body != nil { - return body, nil + if tx != nil { + body, err = rawdb.ReadBodyWithTransactions(tx, hash, blockHeight) + if err != nil { + return nil, err + } + if body != nil { + return body, nil + } } view := r.sn.View() @@ -962,6 +965,10 @@ func (r *BlockReader) EventLookup(ctx context.Context, tx kv.Getter, txnHash com return *n, true, nil } + if r.borSn == nil { + return 0, false, nil + } + view := r.borSn.View() defer view.Close() @@ -1080,6 +1087,10 @@ func (r *BlockReader) EventsByBlock(ctx context.Context, tx kv.Tx, hash common.H } func (r *BlockReader) LastFrozenEventID() uint64 { + if r.borSn == nil { + return 0 + } + view := r.borSn.View() defer view.Close() segments := view.Events() @@ -1108,6 +1119,10 @@ func (r *BlockReader) LastFrozenEventID() uint64 { } func (r *BlockReader) LastFrozenSpanID() uint64 { + if r.borSn == nil { + return 0 + } + view := r.borSn.View() defer view.Close() segments := view.Spans() diff --git a/turbo/snapshotsync/freezeblocks/block_reader_test.go b/turbo/snapshotsync/freezeblocks/block_reader_test.go index 8f1347df9a7..a408ea2b820 100644 --- a/turbo/snapshotsync/freezeblocks/block_reader_test.go +++ b/turbo/snapshotsync/freezeblocks/block_reader_test.go @@ -24,8 +24,8 @@ func TestBlockReaderLastFrozenSpanIDWhenSegmentFilesArePresent(t *testing.T) { logger := testlog.Logger(t, log.LvlInfo) dir := t.TempDir() createTestBorEventSegmentFile(t, 0, 500_000, 132, dir, logger) - createTestSegmentFile(t, 0, 500_000, snaptype.BorSpans, dir, logger) - borRoSnapshots := NewBorRoSnapshots(ethconfig.BlocksFreezing{Enabled: true}, dir, logger) + createTestSegmentFile(t, 0, 500_000, snaptype.BorSpans, dir, 1, logger) + borRoSnapshots := NewBorRoSnapshots(ethconfig.BlocksFreezing{Enabled: true}, dir, 1, logger) defer borRoSnapshots.Close() err := borRoSnapshots.ReopenFolder() require.NoError(t, err) @@ -39,7 +39,7 @@ func TestBlockReaderLastFrozenSpanIDWhenSegmentFilesAreNotPresent(t *testing.T) logger := testlog.Logger(t, log.LvlInfo) dir := t.TempDir() - borRoSnapshots := NewBorRoSnapshots(ethconfig.BlocksFreezing{Enabled: true}, dir, logger) + borRoSnapshots := NewBorRoSnapshots(ethconfig.BlocksFreezing{Enabled: true}, dir, 1, logger) defer borRoSnapshots.Close() err := borRoSnapshots.ReopenFolder() require.NoError(t, err) @@ -56,14 +56,14 @@ func TestBlockReaderLastFrozenSpanIDReturnsLastSegWithIdx(t *testing.T) { createTestBorEventSegmentFile(t, 0, 500_000, 132, dir, logger) createTestBorEventSegmentFile(t, 500_000, 1_000_000, 264, dir, logger) createTestBorEventSegmentFile(t, 1_000_000, 1_500_000, 528, dir, logger) - createTestSegmentFile(t, 0, 500_000, snaptype.BorSpans, dir, logger) - createTestSegmentFile(t, 500_000, 1_000_000, snaptype.BorSpans, dir, logger) - createTestSegmentFile(t, 1_000_000, 1_500_000, snaptype.BorSpans, dir, logger) + createTestSegmentFile(t, 0, 500_000, snaptype.BorSpans, dir, 1, logger) + createTestSegmentFile(t, 500_000, 1_000_000, snaptype.BorSpans, dir, 1, logger) + createTestSegmentFile(t, 1_000_000, 1_500_000, snaptype.BorSpans, dir, 1, logger) // delete idx file for last bor span segment to simulate segment with missing idx file - idxFileToDelete := filepath.Join(dir, snaptype.IdxFileName(1_000_000, 1_500_000, snaptype.BorSpans.String())) + idxFileToDelete := filepath.Join(dir, snaptype.IdxFileName(1, 1_000_000, 1_500_000, snaptype.BorSpans.String())) err := os.Remove(idxFileToDelete) require.NoError(t, err) - borRoSnapshots := NewBorRoSnapshots(ethconfig.BlocksFreezing{Enabled: true}, dir, logger) + borRoSnapshots := NewBorRoSnapshots(ethconfig.BlocksFreezing{Enabled: true}, dir, 1, logger) defer borRoSnapshots.Close() err = borRoSnapshots.ReopenFolder() require.NoError(t, err) @@ -80,20 +80,20 @@ func TestBlockReaderLastFrozenSpanIDReturnsZeroWhenAllSegmentsDoNotHaveIdx(t *te createTestBorEventSegmentFile(t, 0, 500_000, 132, dir, logger) createTestBorEventSegmentFile(t, 500_000, 1_000_000, 264, dir, logger) createTestBorEventSegmentFile(t, 1_000_000, 1_500_000, 528, dir, logger) - createTestSegmentFile(t, 0, 500_000, snaptype.BorSpans, dir, logger) - createTestSegmentFile(t, 500_000, 1_000_000, snaptype.BorSpans, dir, logger) - createTestSegmentFile(t, 1_000_000, 1_500_000, snaptype.BorSpans, dir, logger) + createTestSegmentFile(t, 0, 500_000, snaptype.BorSpans, dir, 1, logger) + createTestSegmentFile(t, 500_000, 1_000_000, snaptype.BorSpans, dir, 1, logger) + createTestSegmentFile(t, 1_000_000, 1_500_000, snaptype.BorSpans, dir, 1, logger) // delete idx file for all bor span segments to simulate segments with missing idx files - idxFileToDelete := filepath.Join(dir, snaptype.IdxFileName(0, 500_000, snaptype.BorSpans.String())) + idxFileToDelete := filepath.Join(dir, snaptype.IdxFileName(1, 1, 500_000, snaptype.BorSpans.String())) err := os.Remove(idxFileToDelete) require.NoError(t, err) - idxFileToDelete = filepath.Join(dir, snaptype.IdxFileName(500_000, 1_000_000, snaptype.BorSpans.String())) + idxFileToDelete = filepath.Join(dir, snaptype.IdxFileName(1, 500_000, 1_000_000, snaptype.BorSpans.String())) err = os.Remove(idxFileToDelete) require.NoError(t, err) - idxFileToDelete = filepath.Join(dir, snaptype.IdxFileName(1_000_000, 1_500_000, snaptype.BorSpans.String())) + idxFileToDelete = filepath.Join(dir, snaptype.IdxFileName(1, 1_000_000, 1_500_000, snaptype.BorSpans.String())) err = os.Remove(idxFileToDelete) require.NoError(t, err) - borRoSnapshots := NewBorRoSnapshots(ethconfig.BlocksFreezing{Enabled: true}, dir, logger) + borRoSnapshots := NewBorRoSnapshots(ethconfig.BlocksFreezing{Enabled: true}, dir, 1, logger) defer borRoSnapshots.Close() err = borRoSnapshots.ReopenFolder() require.NoError(t, err) @@ -108,8 +108,8 @@ func TestBlockReaderLastFrozenEventIDWhenSegmentFilesArePresent(t *testing.T) { logger := testlog.Logger(t, log.LvlInfo) dir := t.TempDir() createTestBorEventSegmentFile(t, 0, 500_000, 132, dir, logger) - createTestSegmentFile(t, 0, 500_000, snaptype.BorSpans, dir, logger) - borRoSnapshots := NewBorRoSnapshots(ethconfig.BlocksFreezing{Enabled: true}, dir, logger) + createTestSegmentFile(t, 0, 500_000, snaptype.BorSpans, dir, 1, logger) + borRoSnapshots := NewBorRoSnapshots(ethconfig.BlocksFreezing{Enabled: true}, dir, 1, logger) defer borRoSnapshots.Close() err := borRoSnapshots.ReopenFolder() require.NoError(t, err) @@ -123,7 +123,7 @@ func TestBlockReaderLastFrozenEventIDWhenSegmentFilesAreNotPresent(t *testing.T) logger := testlog.Logger(t, log.LvlInfo) dir := t.TempDir() - borRoSnapshots := NewBorRoSnapshots(ethconfig.BlocksFreezing{Enabled: true}, dir, logger) + borRoSnapshots := NewBorRoSnapshots(ethconfig.BlocksFreezing{Enabled: true}, dir, 1, logger) defer borRoSnapshots.Close() err := borRoSnapshots.ReopenFolder() require.NoError(t, err) @@ -140,14 +140,14 @@ func TestBlockReaderLastFrozenEventIDReturnsLastSegWithIdx(t *testing.T) { createTestBorEventSegmentFile(t, 0, 500_000, 132, dir, logger) createTestBorEventSegmentFile(t, 500_000, 1_000_000, 264, dir, logger) createTestBorEventSegmentFile(t, 1_000_000, 1_500_000, 528, dir, logger) - createTestSegmentFile(t, 0, 500_000, snaptype.BorSpans, dir, logger) - createTestSegmentFile(t, 500_000, 1_000_000, snaptype.BorSpans, dir, logger) - createTestSegmentFile(t, 1_000_000, 1_500_000, snaptype.BorSpans, dir, logger) + createTestSegmentFile(t, 0, 500_000, snaptype.BorSpans, dir, 1, logger) + createTestSegmentFile(t, 500_000, 1_000_000, snaptype.BorSpans, dir, 1, logger) + createTestSegmentFile(t, 1_000_000, 1_500_000, snaptype.BorSpans, dir, 1, logger) // delete idx file for last bor events segment to simulate segment with missing idx file - idxFileToDelete := filepath.Join(dir, snaptype.IdxFileName(1_000_000, 1_500_000, snaptype.BorEvents.String())) + idxFileToDelete := filepath.Join(dir, snaptype.IdxFileName(1, 1_000_000, 1_500_000, snaptype.BorEvents.String())) err := os.Remove(idxFileToDelete) require.NoError(t, err) - borRoSnapshots := NewBorRoSnapshots(ethconfig.BlocksFreezing{Enabled: true}, dir, logger) + borRoSnapshots := NewBorRoSnapshots(ethconfig.BlocksFreezing{Enabled: true}, dir, 1, logger) defer borRoSnapshots.Close() err = borRoSnapshots.ReopenFolder() require.NoError(t, err) @@ -164,20 +164,20 @@ func TestBlockReaderLastFrozenEventIDReturnsZeroWhenAllSegmentsDoNotHaveIdx(t *t createTestBorEventSegmentFile(t, 0, 500_000, 132, dir, logger) createTestBorEventSegmentFile(t, 500_000, 1_000_000, 264, dir, logger) createTestBorEventSegmentFile(t, 1_000_000, 1_500_000, 528, dir, logger) - createTestSegmentFile(t, 0, 500_000, snaptype.BorSpans, dir, logger) - createTestSegmentFile(t, 500_000, 1_000_000, snaptype.BorSpans, dir, logger) - createTestSegmentFile(t, 1_000_000, 1_500_000, snaptype.BorSpans, dir, logger) + createTestSegmentFile(t, 0, 500_000, snaptype.BorSpans, dir, 1, logger) + createTestSegmentFile(t, 500_000, 1_000_000, snaptype.BorSpans, dir, 1, logger) + createTestSegmentFile(t, 1_000_000, 1_500_000, snaptype.BorSpans, dir, 1, logger) // delete idx files for all bor events segment to simulate segment files with missing idx files - idxFileToDelete := filepath.Join(dir, snaptype.IdxFileName(0, 500_000, snaptype.BorEvents.String())) + idxFileToDelete := filepath.Join(dir, snaptype.IdxFileName(1, 0, 500_000, snaptype.BorEvents.String())) err := os.Remove(idxFileToDelete) require.NoError(t, err) - idxFileToDelete = filepath.Join(dir, snaptype.IdxFileName(500_000, 1_000_000, snaptype.BorEvents.String())) + idxFileToDelete = filepath.Join(dir, snaptype.IdxFileName(1, 500_000, 1_000_000, snaptype.BorEvents.String())) err = os.Remove(idxFileToDelete) require.NoError(t, err) - idxFileToDelete = filepath.Join(dir, snaptype.IdxFileName(1_000_000, 1_500_000, snaptype.BorEvents.String())) + idxFileToDelete = filepath.Join(dir, snaptype.IdxFileName(1, 1_000_000, 1_500_000, snaptype.BorEvents.String())) err = os.Remove(idxFileToDelete) require.NoError(t, err) - borRoSnapshots := NewBorRoSnapshots(ethconfig.BlocksFreezing{Enabled: true}, dir, logger) + borRoSnapshots := NewBorRoSnapshots(ethconfig.BlocksFreezing{Enabled: true}, dir, 1, logger) defer borRoSnapshots.Close() err = borRoSnapshots.ReopenFolder() require.NoError(t, err) @@ -190,7 +190,7 @@ func createTestBorEventSegmentFile(t *testing.T, from, to, eventId uint64, dir s compressor, err := compress.NewCompressor( context.Background(), "test", - filepath.Join(dir, snaptype.SegmentFileName(from, to, snaptype.BorEvents)), + filepath.Join(dir, snaptype.SegmentFileName(1, from, to, snaptype.BorEvents)), dir, 100, 1, @@ -211,7 +211,7 @@ func createTestBorEventSegmentFile(t *testing.T, from, to, eventId uint64, dir s KeyCount: 1, BucketSize: 10, TmpDir: dir, - IndexFile: filepath.Join(dir, snaptype.IdxFileName(from, to, snaptype.BorEvents.String())), + IndexFile: filepath.Join(dir, snaptype.IdxFileName(1, from, to, snaptype.BorEvents.String())), LeafSize: 8, }, logger, diff --git a/turbo/snapshotsync/freezeblocks/block_snapshots.go b/turbo/snapshotsync/freezeblocks/block_snapshots.go index 36c3f5198df..3dbe62f06ab 100644 --- a/turbo/snapshotsync/freezeblocks/block_snapshots.go +++ b/turbo/snapshotsync/freezeblocks/block_snapshots.go @@ -8,7 +8,6 @@ import ( "errors" "fmt" "os" - "path" "path/filepath" "reflect" "runtime" @@ -58,12 +57,14 @@ type HeaderSegment struct { seg *compress.Decompressor // value: first_byte_of_header_hash + header_rlp idxHeaderHash *recsplit.Index // header_hash -> headers_segment_offset Range + version uint8 } type BodySegment struct { seg *compress.Decompressor // value: rlp(types.BodyForStorage) idxBodyNumber *recsplit.Index // block_num_u64 -> bodies_segment_offset Range + version uint8 } type TxnSegment struct { @@ -71,6 +72,7 @@ type TxnSegment struct { IdxTxnHash *recsplit.Index // transaction_hash -> transactions_segment_offset IdxTxnHash2BlockNum *recsplit.Index // transaction_hash -> block_number Range + version uint8 } func (sn *HeaderSegment) closeIdx() { @@ -89,10 +91,25 @@ func (sn *HeaderSegment) close() { sn.closeSeg() sn.closeIdx() } + +func (sn *HeaderSegment) openFiles() []string { + var files []string + + if sn.seg.IsOpen() { + files = append(files, sn.seg.FilePath()) + } + + if sn.idxHeaderHash != nil { + files = append(files, sn.idxHeaderHash.FilePath()) + } + + return files +} + func (sn *HeaderSegment) reopenSeg(dir string) (err error) { sn.closeSeg() - fileName := snaptype.SegmentFileName(sn.from, sn.to, snaptype.Headers) - sn.seg, err = compress.NewDecompressor(path.Join(dir, fileName)) + fileName := snaptype.SegmentFileName(sn.version, sn.from, sn.to, snaptype.Headers) + sn.seg, err = compress.NewDecompressor(filepath.Join(dir, fileName)) if err != nil { return fmt.Errorf("%w, fileName: %s", err, fileName) } @@ -119,8 +136,8 @@ func (sn *HeaderSegment) reopenIdx(dir string) (err error) { if sn.seg == nil { return nil } - fileName := snaptype.IdxFileName(sn.from, sn.to, snaptype.Headers.String()) - sn.idxHeaderHash, err = recsplit.OpenIndex(path.Join(dir, fileName)) + fileName := snaptype.IdxFileName(sn.version, sn.from, sn.to, snaptype.Headers.String()) + sn.idxHeaderHash, err = recsplit.OpenIndex(filepath.Join(dir, fileName)) if err != nil { return fmt.Errorf("%w, fileName: %s", err, fileName) } @@ -145,10 +162,24 @@ func (sn *BodySegment) close() { sn.closeIdx() } +func (sn *BodySegment) openFiles() []string { + var files []string + + if sn.seg.IsOpen() { + files = append(files, sn.seg.FilePath()) + } + + if sn.idxBodyNumber != nil { + files = append(files, sn.idxBodyNumber.FilePath()) + } + + return files +} + func (sn *BodySegment) reopenSeg(dir string) (err error) { sn.closeSeg() - fileName := snaptype.SegmentFileName(sn.from, sn.to, snaptype.Bodies) - sn.seg, err = compress.NewDecompressor(path.Join(dir, fileName)) + fileName := snaptype.SegmentFileName(sn.version, sn.from, sn.to, snaptype.Bodies) + sn.seg, err = compress.NewDecompressor(filepath.Join(dir, fileName)) if err != nil { return fmt.Errorf("%w, fileName: %s", err, fileName) } @@ -176,8 +207,8 @@ func (sn *BodySegment) reopenIdx(dir string) (err error) { if sn.seg == nil { return nil } - fileName := snaptype.IdxFileName(sn.from, sn.to, snaptype.Bodies.String()) - sn.idxBodyNumber, err = recsplit.OpenIndex(path.Join(dir, fileName)) + fileName := snaptype.IdxFileName(sn.version, sn.from, sn.to, snaptype.Bodies.String()) + sn.idxBodyNumber, err = recsplit.OpenIndex(filepath.Join(dir, fileName)) if err != nil { return fmt.Errorf("%w, fileName: %s", err, fileName) } @@ -204,10 +235,29 @@ func (sn *TxnSegment) close() { sn.closeSeg() sn.closeIdx() } + +func (sn *TxnSegment) openFiles() []string { + var files []string + + if sn.Seg.IsOpen() { + files = append(files, sn.Seg.FilePath()) + } + + if sn.IdxTxnHash != nil && sn.IdxTxnHash.IsOpen() { + files = append(files, sn.IdxTxnHash.FilePath()) + } + + if sn.IdxTxnHash2BlockNum != nil && sn.IdxTxnHash2BlockNum.IsOpen() { + files = append(files, sn.IdxTxnHash2BlockNum.FilePath()) + } + + return files +} + func (sn *TxnSegment) reopenSeg(dir string) (err error) { sn.closeSeg() - fileName := snaptype.SegmentFileName(sn.from, sn.to, snaptype.Transactions) - sn.Seg, err = compress.NewDecompressor(path.Join(dir, fileName)) + fileName := snaptype.SegmentFileName(sn.version, sn.from, sn.to, snaptype.Transactions) + sn.Seg, err = compress.NewDecompressor(filepath.Join(dir, fileName)) if err != nil { return fmt.Errorf("%w, fileName: %s", err, fileName) } @@ -218,8 +268,8 @@ func (sn *TxnSegment) reopenIdx(dir string) (err error) { if sn.Seg == nil { return nil } - fileName := snaptype.IdxFileName(sn.from, sn.to, snaptype.Transactions.String()) - sn.IdxTxnHash, err = recsplit.OpenIndex(path.Join(dir, fileName)) + fileName := snaptype.IdxFileName(sn.version, sn.from, sn.to, snaptype.Transactions.String()) + sn.IdxTxnHash, err = recsplit.OpenIndex(filepath.Join(dir, fileName)) if err != nil { return fmt.Errorf("%w, fileName: %s", err, fileName) } @@ -239,8 +289,8 @@ func (sn *TxnSegment) reopenIdx(dir string) (err error) { } */ - fileName = snaptype.IdxFileName(sn.from, sn.to, snaptype.Transactions2Block.String()) - sn.IdxTxnHash2BlockNum, err = recsplit.OpenIndex(path.Join(dir, fileName)) + fileName = snaptype.IdxFileName(sn.version, sn.from, sn.to, snaptype.Transactions2Block.String()) + sn.IdxTxnHash2BlockNum, err = recsplit.OpenIndex(filepath.Join(dir, fileName)) if err != nil { return fmt.Errorf("%w, fileName: %s", err, fileName) } @@ -331,7 +381,11 @@ type RoSnapshots struct { segmentsMax atomic.Uint64 // all types of .seg files are available - up to this number idxMax atomic.Uint64 // all types of .idx files are available - up to this number cfg ethconfig.BlocksFreezing + version uint8 logger log.Logger + + // allows for pruning segments - this is the min availible segment + segmentsMin atomic.Uint64 } // NewRoSnapshots - opens all snapshots. But to simplify everything: @@ -339,21 +393,24 @@ type RoSnapshots struct { // - all snapshots of given blocks range must exist - to make this blocks range available // - gaps are not allowed // - segment have [from:to) semantic -func NewRoSnapshots(cfg ethconfig.BlocksFreezing, snapDir string, logger log.Logger) *RoSnapshots { - return &RoSnapshots{dir: snapDir, cfg: cfg, Headers: &headerSegments{}, Bodies: &bodySegments{}, Txs: &txnSegments{}, logger: logger} +func NewRoSnapshots(cfg ethconfig.BlocksFreezing, snapDir string, version uint8, logger log.Logger) *RoSnapshots { + return &RoSnapshots{dir: snapDir, cfg: cfg, version: version, Headers: &headerSegments{}, Bodies: &bodySegments{}, Txs: &txnSegments{}, logger: logger} } +func (s *RoSnapshots) Version() uint8 { return s.version } func (s *RoSnapshots) Cfg() ethconfig.BlocksFreezing { return s.cfg } func (s *RoSnapshots) Dir() string { return s.dir } func (s *RoSnapshots) SegmentsReady() bool { return s.segmentsReady.Load() } func (s *RoSnapshots) IndicesReady() bool { return s.indicesReady.Load() } func (s *RoSnapshots) IndicesMax() uint64 { return s.idxMax.Load() } func (s *RoSnapshots) SegmentsMax() uint64 { return s.segmentsMax.Load() } +func (s *RoSnapshots) SegmentsMin() uint64 { return s.segmentsMin.Load() } +func (s *RoSnapshots) SetSegmentsMin(min uint64) { s.segmentsMin.Store(min) } func (s *RoSnapshots) BlocksAvailable() uint64 { return cmp.Min(s.segmentsMax.Load(), s.idxMax.Load()) } -func (s *RoSnapshots) LogStat() { +func (s *RoSnapshots) LogStat(label string) { var m runtime.MemStats dbg.ReadMemStats(&m) - s.logger.Info("[snapshots] Blocks Stat", + s.logger.Info(fmt.Sprintf("[snapshots:%s] Blocks Stat", label), "blocks", fmt.Sprintf("%dk", (s.BlocksAvailable()+1)/1000), "indices", fmt.Sprintf("%dk", (s.IndicesMax()+1)/1000), "alloc", common2.ByteCount(m.Alloc), "sys", common2.ByteCount(m.Sys)) @@ -517,6 +574,29 @@ func (s *RoSnapshots) Files() (list []string) { return list } +func (s *RoSnapshots) OpenFiles() (list []string) { + s.Headers.lock.RLock() + defer s.Headers.lock.RUnlock() + s.Bodies.lock.RLock() + defer s.Bodies.lock.RUnlock() + s.Txs.lock.RLock() + defer s.Txs.lock.RUnlock() + + for _, header := range s.Headers.segments { + list = append(list, header.openFiles()...) + } + + for _, body := range s.Bodies.segments { + list = append(list, body.openFiles()...) + } + + for _, txs := range s.Txs.segments { + list = append(list, txs.openFiles()...) + } + + return list +} + // ReopenList stops on optimistic=false, continue opening files on optimistic=true func (s *RoSnapshots) ReopenList(fileNames []string, optimistic bool) error { return s.rebuildSegments(fileNames, true, optimistic) @@ -560,7 +640,7 @@ Loop: } } if !exists { - sn = &HeaderSegment{Range: Range{f.From, f.To}} + sn = &HeaderSegment{version: f.Version, Range: Range{f.From, f.To}} } if open { @@ -606,7 +686,7 @@ Loop: } } if !exists { - sn = &BodySegment{Range: Range{f.From, f.To}} + sn = &BodySegment{version: f.Version, Range: Range{f.From, f.To}} } if open { @@ -649,7 +729,7 @@ Loop: } } if !exists { - sn = &TxnSegment{Range: Range{f.From, f.To}} + sn = &TxnSegment{version: f.Version, Range: Range{f.From, f.To}} } if open { @@ -719,7 +799,7 @@ func (s *RoSnapshots) ReopenFolder() error { } func (s *RoSnapshots) ReopenSegments(types []snaptype.Type) error { - files, _, err := segments(s.dir, func(dir string, in []snaptype.FileInfo) (res []snaptype.FileInfo) { + files, _, err := segments(s.dir, s.version, 0, func(dir string, in []snaptype.FileInfo) (res []snaptype.FileInfo) { return typeOfSegmentsMustExist(dir, in, types) }) @@ -914,7 +994,7 @@ func buildIdx(ctx context.Context, sn snaptype.FileInfo, chainConfig *chain.Conf //log.Info("[snapshots] build idx", "file", fName) switch sn.T { case snaptype.Headers: - if err := HeadersIdx(ctx, sn.Path, sn.From, tmpDir, p, lvl, logger); err != nil { + if err := HeadersIdx(ctx, sn.Path, sn.Version, sn.From, tmpDir, p, lvl, logger); err != nil { return err } case snaptype.Bodies: @@ -923,17 +1003,17 @@ func buildIdx(ctx context.Context, sn snaptype.FileInfo, chainConfig *chain.Conf } case snaptype.Transactions: dir, _ := filepath.Split(sn.Path) - if err := TransactionsIdx(ctx, chainConfig, sn.From, sn.To, dir, tmpDir, p, lvl, logger); err != nil { + if err := TransactionsIdx(ctx, chainConfig, sn.Version, sn.From, sn.To, dir, tmpDir, p, lvl, logger); err != nil { return err } case snaptype.BorEvents: dir, _ := filepath.Split(sn.Path) - if err := BorEventsIdx(ctx, sn.Path, sn.From, sn.To, dir, tmpDir, p, lvl, logger); err != nil { + if err := BorEventsIdx(ctx, sn.Path, sn.Version, sn.From, sn.To, dir, tmpDir, p, lvl, logger); err != nil { return err } case snaptype.BorSpans: dir, _ := filepath.Split(sn.Path) - if err := BorSpansIdx(ctx, sn.Path, sn.From, sn.To, dir, tmpDir, p, lvl, logger); err != nil { + if err := BorSpansIdx(ctx, sn.Path, sn.Version, sn.From, sn.To, dir, tmpDir, p, lvl, logger); err != nil { return err } } @@ -941,11 +1021,11 @@ func buildIdx(ctx context.Context, sn snaptype.FileInfo, chainConfig *chain.Conf return nil } -func BuildMissedIndices(logPrefix string, ctx context.Context, dirs datadir.Dirs, chainConfig *chain.Config, workers int, logger log.Logger) error { +func BuildMissedIndices(logPrefix string, ctx context.Context, dirs datadir.Dirs, version uint8, minIndex uint64, chainConfig *chain.Config, workers int, logger log.Logger) error { dir, tmpDir := dirs.Snap, dirs.Tmp //log.Log(lvl, "[snapshots] Build indices", "from", min) - segments, _, err := Segments(dir) + segments, _, err := Segments(dir, version, minIndex) if err != nil { return err } @@ -1008,10 +1088,10 @@ func BuildMissedIndices(logPrefix string, ctx context.Context, dirs datadir.Dirs } } -func BuildBorMissedIndices(logPrefix string, ctx context.Context, dirs datadir.Dirs, chainConfig *chain.Config, workers int, logger log.Logger) error { +func BuildBorMissedIndices(logPrefix string, ctx context.Context, dirs datadir.Dirs, version uint8, minIndex uint64, chainConfig *chain.Config, workers int, logger log.Logger) error { dir, tmpDir := dirs.Snap, dirs.Tmp - segments, _, err := BorSegments(dir) + segments, _, err := BorSegments(dir, version, minIndex) if err != nil { return err } @@ -1020,7 +1100,7 @@ func BuildBorMissedIndices(logPrefix string, ctx context.Context, dirs datadir.D g, gCtx := errgroup.WithContext(ctx) g.SetLimit(workers) - for _, t := range []snaptype.Type{snaptype.BorEvents, snaptype.BorSpans} { + for _, t := range snaptype.BorSnapshotTypes { for _, segment := range segments { if segment.T != t { continue @@ -1085,8 +1165,8 @@ func sendDiagnostics(startIndexingTime time.Time, indexPercent map[string]int, a }) } -func noGaps(in []snaptype.FileInfo) (out []snaptype.FileInfo, missingSnapshots []Range) { - var prevTo uint64 +func noGaps(in []snaptype.FileInfo, from uint64) (out []snaptype.FileInfo, missingSnapshots []Range) { + prevTo := from for _, f := range in { if f.To <= prevTo { continue @@ -1108,7 +1188,7 @@ MainLoop: continue } for _, t := range types { - p := filepath.Join(dir, snaptype.SegmentFileName(f.From, f.To, t)) + p := filepath.Join(dir, snaptype.SegmentFileName(f.Version, f.From, f.To, t)) if !dir2.FileExist(p) { continue MainLoop } @@ -1151,8 +1231,8 @@ func noOverlaps(in []snaptype.FileInfo) (res []snaptype.FileInfo) { return res } -func SegmentsCaplin(dir string) (res []snaptype.FileInfo, missingSnapshots []Range, err error) { - list, err := snaptype.Segments(dir) +func SegmentsCaplin(dir string, version uint8, minBlock uint64) (res []snaptype.FileInfo, missingSnapshots []Range, err error) { + list, err := snaptype.Segments(dir, version) if err != nil { return nil, missingSnapshots, err } @@ -1166,19 +1246,19 @@ func SegmentsCaplin(dir string) (res []snaptype.FileInfo, missingSnapshots []Ran } l = append(l, f) } - l, m = noGaps(noOverlaps(l)) + l, m = noGaps(noOverlaps(l), minBlock) res = append(res, l...) missingSnapshots = append(missingSnapshots, m...) } return res, missingSnapshots, nil } -func Segments(dir string) (res []snaptype.FileInfo, missingSnapshots []Range, err error) { - return segments(dir, allTypeOfSegmentsMustExist) +func Segments(dir string, version uint8, minBlock uint64) (res []snaptype.FileInfo, missingSnapshots []Range, err error) { + return segments(dir, version, minBlock, allTypeOfSegmentsMustExist) } -func segments(dir string, segmentsTypeCheck func(dir string, in []snaptype.FileInfo) []snaptype.FileInfo) (res []snaptype.FileInfo, missingSnapshots []Range, err error) { - list, err := snaptype.Segments(dir) +func segments(dir string, version uint8, minBlock uint64, segmentsTypeCheck func(dir string, in []snaptype.FileInfo) []snaptype.FileInfo) (res []snaptype.FileInfo, missingSnapshots []Range, err error) { + list, err := snaptype.Segments(dir, version) if err != nil { return nil, missingSnapshots, err } @@ -1191,7 +1271,7 @@ func segments(dir string, segmentsTypeCheck func(dir string, in []snaptype.FileI } l = append(l, f) } - l, m = noGaps(noOverlaps(segmentsTypeCheck(dir, l))) + l, m = noGaps(noOverlaps(segmentsTypeCheck(dir, l)), minBlock) res = append(res, l...) missingSnapshots = append(missingSnapshots, m...) } @@ -1203,7 +1283,7 @@ func segments(dir string, segmentsTypeCheck func(dir string, in []snaptype.FileI } l = append(l, f) } - l, _ = noGaps(noOverlaps(segmentsTypeCheck(dir, l))) + l, _ = noGaps(noOverlaps(segmentsTypeCheck(dir, l)), minBlock) res = append(res, l...) } { @@ -1214,7 +1294,7 @@ func segments(dir string, segmentsTypeCheck func(dir string, in []snaptype.FileI } l = append(l, f) } - l, _ = noGaps(noOverlaps(segmentsTypeCheck(dir, l))) + l, _ = noGaps(noOverlaps(segmentsTypeCheck(dir, l)), minBlock) res = append(res, l...) } @@ -1233,6 +1313,7 @@ func chooseSegmentEnd(from, to, blocksPerFile uint64) uint64 { } type BlockRetire struct { + maxScheduledBlock atomic.Uint64 working atomic.Bool needSaveFilesListInDB atomic.Bool @@ -1252,6 +1333,10 @@ func NewBlockRetire(compressWorkers int, dirs datadir.Dirs, blockReader services return &BlockRetire{workers: compressWorkers, tmpDir: dirs.Tmp, dirs: dirs, blockReader: blockReader, blockWriter: blockWriter, db: db, chainConfig: chainConfig, notifier: notifier, logger: logger} } +func (br *BlockRetire) SetWorkers(workers int) { + br.workers = workers +} + func (br *BlockRetire) IO() (services.FullBlockReader, *blockio.BlockWriter) { return br.blockReader, br.blockWriter } @@ -1316,22 +1401,23 @@ func CanDeleteTo(curBlockNum uint64, blocksInSnapshots uint64) (blockTo uint64) return cmp.Min(hardLimit, blocksInSnapshots+1) } -func (br *BlockRetire) retireBlocks(ctx context.Context, forwardProgress uint64, lvl log.Lvl, seedNewSnapshots func(downloadRequest []services.DownloadRequest) error, onDelete func(l []string) error) (bool, error) { +func (br *BlockRetire) retireBlocks(ctx context.Context, minBlockNum uint64, maxBlockNum uint64, lvl log.Lvl, seedNewSnapshots func(downloadRequest []services.DownloadRequest) error, onDelete func(l []string) error) (bool, error) { notifier, logger, blockReader, tmpDir, db, workers := br.notifier, br.logger, br.blockReader, br.tmpDir, br.db, br.workers snapshots := br.snapshots() firstTxNum := blockReader.(*BlockReader).FirstTxNumNotInSnapshots() - blockFrom, blockTo, ok := CanRetire(forwardProgress, br.blockReader.FrozenBlocks()) + blockFrom, blockTo, ok := CanRetire(maxBlockNum, minBlockNum) + if ok { logger.Log(lvl, "[snapshots] Retire Blocks", "range", fmt.Sprintf("%dk-%dk", blockFrom/1000, blockTo/1000)) // in future we will do it in background - if err := DumpBlocks(ctx, blockFrom, blockTo, snaptype.Erigon2MergeLimit, tmpDir, snapshots.Dir(), firstTxNum, db, workers, lvl, logger, blockReader); err != nil { + if err := DumpBlocks(ctx, snapshots.version, blockFrom, blockTo, snaptype.Erigon2MergeLimit, tmpDir, snapshots.Dir(), firstTxNum, db, workers, lvl, logger, blockReader); err != nil { return ok, fmt.Errorf("DumpBlocks: %w", err) } if err := snapshots.ReopenFolder(); err != nil { return ok, fmt.Errorf("reopen: %w", err) } - snapshots.LogStat() + snapshots.LogStat("retire") if notifier != nil && !reflect.ValueOf(notifier).IsNil() { // notify about new snapshots of any size notifier.OnNewSnapshot() } @@ -1375,12 +1461,16 @@ func (br *BlockRetire) PruneAncientBlocks(tx kv.RwTx, limit int) error { return err } canDeleteTo := CanDeleteTo(currentProgress, br.blockReader.FrozenBlocks()) + + br.logger.Info("[snapshots] Prune Blocks", "to", canDeleteTo, "limit", limit) if err := br.blockWriter.PruneBlocks(context.Background(), tx, canDeleteTo, limit); err != nil { return err } includeBor := br.chainConfig.Bor != nil if includeBor { canDeleteTo := CanDeleteTo(currentProgress, br.blockReader.FrozenBorBlocks()) + br.logger.Info("[snapshots] Prune Bor Blocks", "to", canDeleteTo, "limit", limit) + if err := br.blockWriter.PruneBorBlocks(context.Background(), tx, canDeleteTo, limit, bor.SpanIDAt); err != nil { return err } @@ -1388,27 +1478,47 @@ func (br *BlockRetire) PruneAncientBlocks(tx kv.RwTx, limit int) error { return nil } -func (br *BlockRetire) RetireBlocksInBackground(ctx context.Context, forwardProgress uint64, lvl log.Lvl, seedNewSnapshots func(downloadRequest []services.DownloadRequest) error, onDeleteSnapshots func(l []string) error) { - ok := br.working.CompareAndSwap(false, true) - if !ok { - // go-routine is still working +func (br *BlockRetire) RetireBlocksInBackground(ctx context.Context, minBlockNum uint64, maxBlockNum uint64, lvl log.Lvl, seedNewSnapshots func(downloadRequest []services.DownloadRequest) error, onDeleteSnapshots func(l []string) error) { + if maxBlockNum > br.maxScheduledBlock.Load() { + br.maxScheduledBlock.Store(maxBlockNum) + } + + if !br.working.CompareAndSwap(false, true) { return } + go func() { + defer br.working.Store(false) - if err := br.RetireBlocks(ctx, forwardProgress, lvl, seedNewSnapshots, onDeleteSnapshots); err != nil { - br.logger.Warn("[snapshots] retire blocks", "err", err) + for { + maxBlockNum := br.maxScheduledBlock.Load() + + err := br.RetireBlocks(ctx, minBlockNum, maxBlockNum, lvl, seedNewSnapshots, onDeleteSnapshots) + + if err != nil { + br.logger.Warn("[snapshots] retire blocks", "err", err) + return + } + + if maxBlockNum == br.maxScheduledBlock.Load() { + return + } } }() } -func (br *BlockRetire) RetireBlocks(ctx context.Context, forwardProgress uint64, lvl log.Lvl, seedNewSnapshots func(downloadRequest []services.DownloadRequest) error, onDeleteSnapshots func(l []string) error) (err error) { +func (br *BlockRetire) RetireBlocks(ctx context.Context, minBlockNum uint64, maxBlockNum uint64, lvl log.Lvl, seedNewSnapshots func(downloadRequest []services.DownloadRequest) error, onDeleteSnapshots func(l []string) error) (err error) { includeBor := br.chainConfig.Bor != nil + if includeBor { // "bor snaps" can be behind "block snaps", it's ok: for example because of `kill -9` in the middle of merge - for br.blockReader.FrozenBorBlocks() < br.blockReader.FrozenBlocks() { - ok, err := br.retireBorBlocks(ctx, forwardProgress, lvl, seedNewSnapshots, onDeleteSnapshots) + if frozen := br.blockReader.FrozenBlocks(); frozen > minBlockNum { + minBlockNum = frozen + } + + for br.blockReader.FrozenBorBlocks() < minBlockNum { + ok, err := br.retireBorBlocks(ctx, minBlockNum, maxBlockNum, lvl, seedNewSnapshots, onDeleteSnapshots) if err != nil { return err } @@ -1420,13 +1530,17 @@ func (br *BlockRetire) RetireBlocks(ctx context.Context, forwardProgress uint64, var ok, okBor bool for { - ok, err = br.retireBlocks(ctx, forwardProgress, lvl, seedNewSnapshots, onDeleteSnapshots) + if frozen := br.blockReader.FrozenBlocks(); frozen > minBlockNum { + minBlockNum = frozen + } + + ok, err = br.retireBlocks(ctx, minBlockNum, maxBlockNum, lvl, seedNewSnapshots, onDeleteSnapshots) if err != nil { return err } if includeBor { - okBor, err = br.retireBorBlocks(ctx, forwardProgress, lvl, seedNewSnapshots, onDeleteSnapshots) + okBor, err = br.retireBorBlocks(ctx, minBlockNum, maxBlockNum, lvl, seedNewSnapshots, onDeleteSnapshots) if err != nil { return err } @@ -1436,6 +1550,7 @@ func (br *BlockRetire) RetireBlocks(ctx context.Context, forwardProgress uint64, break } } + return nil } @@ -1456,7 +1571,7 @@ func (br *BlockRetire) buildMissedIndicesIfNeed(ctx context.Context, logPrefix s if snapshots.IndicesMax() >= snapshots.SegmentsMax() { return nil } - snapshots.LogStat() + snapshots.LogStat("missed-idx") if !snapshots.Cfg().Produce && snapshots.IndicesMax() == 0 { return fmt.Errorf("please remove --snap.stop, erigon can't work without creating basic indices") } @@ -1469,14 +1584,14 @@ func (br *BlockRetire) buildMissedIndicesIfNeed(ctx context.Context, logPrefix s // wait for Downloader service to download all expected snapshots indexWorkers := estimate.IndexSnapshot.Workers() - if err := BuildMissedIndices(logPrefix, ctx, br.dirs, cc, indexWorkers, br.logger); err != nil { + if err := BuildMissedIndices(logPrefix, ctx, br.dirs, snapshots.Version(), snapshots.SegmentsMin(), cc, indexWorkers, br.logger); err != nil { return fmt.Errorf("BuildMissedIndices: %w", err) } if err := snapshots.ReopenFolder(); err != nil { return err } - snapshots.LogStat() + snapshots.LogStat("missed-idx:reopen") if notifier != nil { notifier.OnNewSnapshot() } @@ -1493,7 +1608,7 @@ func (br *BlockRetire) buildBorMissedIndicesIfNeed(ctx context.Context, logPrefi return nil } - borSnapshots.LogStat() + borSnapshots.LogStat("bor:missed-idx") if !borSnapshots.Cfg().Produce && borSnapshots.IndicesMax() == 0 { return fmt.Errorf("please remove --snap.stop, erigon can't work without creating basic indices") } @@ -1506,40 +1621,40 @@ func (br *BlockRetire) buildBorMissedIndicesIfNeed(ctx context.Context, logPrefi // wait for Downloader service to download all expected snapshots indexWorkers := estimate.IndexSnapshot.Workers() - if err := BuildBorMissedIndices(logPrefix, ctx, br.dirs, cc, indexWorkers, br.logger); err != nil { + if err := BuildBorMissedIndices(logPrefix, ctx, br.dirs, borSnapshots.Version(), borSnapshots.SegmentsMin(), cc, indexWorkers, br.logger); err != nil { return fmt.Errorf("BuildBorMissedIndices: %w", err) } if err := borSnapshots.ReopenFolder(); err != nil { return err } - borSnapshots.LogStat() + borSnapshots.LogStat("bor:missed-idx:reopen") if notifier != nil { notifier.OnNewSnapshot() } return nil } -func DumpBlocks(ctx context.Context, blockFrom, blockTo, blocksPerFile uint64, tmpDir, snapDir string, firstTxNum uint64, chainDB kv.RoDB, workers int, lvl log.Lvl, logger log.Logger, blockReader services.FullBlockReader) error { +func DumpBlocks(ctx context.Context, version uint8, blockFrom, blockTo, blocksPerFile uint64, tmpDir, snapDir string, firstTxNum uint64, chainDB kv.RoDB, workers int, lvl log.Lvl, logger log.Logger, blockReader services.FullBlockReader) error { if blocksPerFile == 0 { return nil } chainConfig := fromdb.ChainConfig(chainDB) for i := blockFrom; i < blockTo; i = chooseSegmentEnd(i, blockTo, blocksPerFile) { - if err := dumpBlocksRange(ctx, i, chooseSegmentEnd(i, blockTo, blocksPerFile), tmpDir, snapDir, firstTxNum, chainDB, *chainConfig, workers, lvl, logger, blockReader); err != nil { + if err := dumpBlocksRange(ctx, version, i, chooseSegmentEnd(i, blockTo, blocksPerFile), tmpDir, snapDir, firstTxNum, chainDB, *chainConfig, workers, lvl, logger, blockReader); err != nil { return err } } return nil } -func dumpBlocksRange(ctx context.Context, blockFrom, blockTo uint64, tmpDir, snapDir string, firstTxNum uint64, chainDB kv.RoDB, chainConfig chain.Config, workers int, lvl log.Lvl, logger log.Logger, blockReader services.FullBlockReader) error { +func dumpBlocksRange(ctx context.Context, version uint8, blockFrom, blockTo uint64, tmpDir, snapDir string, firstTxNum uint64, chainDB kv.RoDB, chainConfig chain.Config, workers int, lvl log.Lvl, logger log.Logger, blockReader services.FullBlockReader) error { logEvery := time.NewTicker(20 * time.Second) defer logEvery.Stop() { - segName := snaptype.SegmentFileName(blockFrom, blockTo, snaptype.Headers) + segName := snaptype.SegmentFileName(version, blockFrom, blockTo, snaptype.Headers) f, _ := snaptype.ParseFileName(snapDir, segName) sn, err := compress.NewCompressor(ctx, "Snapshot Headers", f.Path, tmpDir, compress.MinPatternScore, workers, log.LvlTrace, logger) @@ -1563,7 +1678,7 @@ func dumpBlocksRange(ctx context.Context, blockFrom, blockTo uint64, tmpDir, sna } { - segName := snaptype.SegmentFileName(blockFrom, blockTo, snaptype.Bodies) + segName := snaptype.SegmentFileName(version, blockFrom, blockTo, snaptype.Bodies) f, _ := snaptype.ParseFileName(snapDir, segName) sn, err := compress.NewCompressor(ctx, "Snapshot Bodies", f.Path, tmpDir, compress.MinPatternScore, workers, log.LvlTrace, logger) @@ -1587,7 +1702,7 @@ func dumpBlocksRange(ctx context.Context, blockFrom, blockTo uint64, tmpDir, sna } { - segName := snaptype.SegmentFileName(blockFrom, blockTo, snaptype.Transactions) + segName := snaptype.SegmentFileName(version, blockFrom, blockTo, snaptype.Transactions) f, _ := snaptype.ParseFileName(snapDir, segName) sn, err := compress.NewCompressor(ctx, "Snapshot Txs", f.Path, tmpDir, compress.MinPatternScore, workers, log.LvlTrace, logger) @@ -1609,7 +1724,7 @@ func dumpBlocksRange(ctx context.Context, blockFrom, blockTo uint64, tmpDir, sna ext := filepath.Ext(fileName) logger.Log(lvl, "[snapshots] Compression start", "file", fileName[:len(fileName)-len(ext)], "workers", sn.Workers()) t := time.Now() - _, expectedCount, err = txsAmountBasedOnBodiesSnapshots(snapDir, blockFrom, blockTo) + _, expectedCount, err = txsAmountBasedOnBodiesSnapshots(snapDir, version, blockFrom, blockTo) if err != nil { return err } @@ -1632,24 +1747,24 @@ func dumpBlocksRange(ctx context.Context, blockFrom, blockTo uint64, tmpDir, sna func hasIdxFile(sn snaptype.FileInfo, logger log.Logger) bool { dir, _ := filepath.Split(sn.Path) - fName := snaptype.IdxFileName(sn.From, sn.To, sn.T.String()) + fName := snaptype.IdxFileName(sn.Version, sn.From, sn.To, sn.T.String()) var result = true switch sn.T { case snaptype.Headers, snaptype.Bodies, snaptype.BorEvents, snaptype.BorSpans, snaptype.BeaconBlocks: - idx, err := recsplit.OpenIndex(path.Join(dir, fName)) + idx, err := recsplit.OpenIndex(filepath.Join(dir, fName)) if err != nil { return false } idx.Close() case snaptype.Transactions: - idx, err := recsplit.OpenIndex(path.Join(dir, fName)) + idx, err := recsplit.OpenIndex(filepath.Join(dir, fName)) if err != nil { return false } idx.Close() - fName = snaptype.IdxFileName(sn.From, sn.To, snaptype.Transactions2Block.String()) - idx, err = recsplit.OpenIndex(path.Join(dir, fName)) + fName = snaptype.IdxFileName(sn.Version, sn.From, sn.To, snaptype.Transactions2Block.String()) + idx, err = recsplit.OpenIndex(filepath.Join(dir, fName)) if err != nil { return false } @@ -1658,6 +1773,12 @@ func hasIdxFile(sn snaptype.FileInfo, logger log.Logger) bool { return result } +var bufPool = sync.Pool{ + New: func() any { + return make([]byte, 16*4096) + }, +} + // DumpTxs - [from, to) // Format: hash[0]_1byte + sender_address_2bytes + txnRlp func DumpTxs(ctx context.Context, db kv.RoDB, blockFrom, blockTo uint64, chainConfig *chain.Config, workers int, lvl log.Lvl, logger log.Logger, collect func([]byte) error) (expectedCount int, err error) { @@ -1669,12 +1790,12 @@ func DumpTxs(ctx context.Context, db kv.RoDB, blockFrom, blockTo uint64, chainCo chainID, _ := uint256.FromBig(chainConfig.ChainID) numBuf := make([]byte, 8) - parseCtx := types2.NewTxParseContext(*chainID) - parseCtx.WithSender(false) - slot := types2.TxSlot{} - var sender [20]byte - parse := func(v, valueBuf []byte, senders []common2.Address, j int) ([]byte, error) { - if _, err := parseCtx.ParseTransaction(v, 0, &slot, sender[:], false /* hasEnvelope */, false /* wrappedWithBlobs */, nil); err != nil { + + parse := func(ctx *types2.TxParseContext, v, valueBuf []byte, senders []common2.Address, j int) ([]byte, error) { + var sender [20]byte + slot := types2.TxSlot{} + + if _, err := ctx.ParseTransaction(v, 0, &slot, sender[:], false /* hasEnvelope */, false /* wrappedWithBlobs */, nil); err != nil { return valueBuf, err } if len(senders) > 0 { @@ -1687,8 +1808,8 @@ func DumpTxs(ctx context.Context, db kv.RoDB, blockFrom, blockTo uint64, chainCo valueBuf = append(valueBuf, v...) return valueBuf, nil } - valueBuf := make([]byte, 16*4096) - addSystemTx := func(tx kv.Tx, txId uint64) error { + + addSystemTx := func(ctx *types2.TxParseContext, tx kv.Tx, txId uint64) error { binary.BigEndian.PutUint64(numBuf, txId) tv, err := tx.GetOne(kv.EthTx, numBuf) if err != nil { @@ -1701,8 +1822,12 @@ func DumpTxs(ctx context.Context, db kv.RoDB, blockFrom, blockTo uint64, chainCo return nil } - parseCtx.WithSender(false) - valueBuf, err = parse(tv, valueBuf, nil, 0) + ctx.WithSender(false) + + valueBuf := bufPool.Get().([]byte) + defer bufPool.Put(valueBuf) //nolint + + valueBuf, err = parse(ctx, tv, valueBuf, nil, 0) if err != nil { return err } @@ -1747,30 +1872,89 @@ func DumpTxs(ctx context.Context, db kv.RoDB, blockFrom, blockTo uint64, chainCo return false, err } - j := 0 + workers := estimate.AlmostAllCPUs() + + if workers > 3 { + workers = workers / 3 * 2 + } + + if workers > int(body.TxAmount-2) { + if int(body.TxAmount-2) > 1 { + workers = int(body.TxAmount - 2) + } else { + workers = 1 + } + } + + parsers := errgroup.Group{} + parsers.SetLimit(workers) + + valueBufs := make([][]byte, workers) + parseCtxs := make([]*types2.TxParseContext, workers) - if err := addSystemTx(tx, body.BaseTxId); err != nil { + for i := 0; i < workers; i++ { + valueBuf := bufPool.Get().([]byte) + defer bufPool.Put(valueBuf) //nolint + valueBufs[i] = valueBuf + parseCtxs[i] = types2.NewTxParseContext(*chainID) + } + + if err := addSystemTx(parseCtxs[0], tx, body.BaseTxId); err != nil { return false, err } + binary.BigEndian.PutUint64(numBuf, body.BaseTxId+1) + + collected := -1 + collectorLock := sync.Mutex{} + collections := sync.NewCond(&collectorLock) + + var j int + if err := tx.ForAmount(kv.EthTx, numBuf, body.TxAmount-2, func(_, tv []byte) error { - parseCtx.WithSender(len(senders) == 0) - valueBuf, err = parse(tv, valueBuf, senders, j) - if err != nil { - return fmt.Errorf("%w, block: %d", err, blockNum) - } - // first tx byte => sender adress => tx rlp - if err := collect(valueBuf); err != nil { - return err - } + tx := j j++ + parsers.Go(func() error { + parseCtx := parseCtxs[tx%workers] + + parseCtx.WithSender(len(senders) == 0) + parseCtx.WithAllowPreEip2s(blockNum <= chainConfig.HomesteadBlock.Uint64()) + + valueBuf, err := parse(parseCtx, tv, valueBufs[tx%workers], senders, tx) + + if err != nil { + return fmt.Errorf("%w, block: %d", err, blockNum) + } + + collectorLock.Lock() + defer collectorLock.Unlock() + + for collected < tx-1 { + collections.Wait() + } + + // first tx byte => sender adress => tx rlp + if err := collect(valueBuf); err != nil { + return err + } + + collected = tx + collections.Broadcast() + + return nil + }) + return nil }); err != nil { return false, fmt.Errorf("ForAmount: %w", err) } - if err := addSystemTx(tx, body.BaseTxId+uint64(body.TxAmount)-1); err != nil { + if err := parsers.Wait(); err != nil { + return false, fmt.Errorf("ForAmount parser: %w", err) + } + + if err := addSystemTx(parseCtxs[0], tx, body.BaseTxId+uint64(body.TxAmount)-1); err != nil { return false, err } @@ -1913,8 +2097,8 @@ func DumpBodies(ctx context.Context, db kv.RoDB, blockFrom, blockTo uint64, firs var EmptyTxHash = common2.Hash{} -func txsAmountBasedOnBodiesSnapshots(snapDir string, blockFrom, blockTo uint64) (firstTxID uint64, expectedCount int, err error) { - bodySegmentPath := filepath.Join(snapDir, snaptype.SegmentFileName(blockFrom, blockTo, snaptype.Bodies)) +func txsAmountBasedOnBodiesSnapshots(snapDir string, version uint8, blockFrom, blockTo uint64) (firstTxID uint64, expectedCount int, err error) { + bodySegmentPath := filepath.Join(snapDir, snaptype.SegmentFileName(version, blockFrom, blockTo, snaptype.Bodies)) bodiesSegment, err := compress.NewDecompressor(bodySegmentPath) if err != nil { return @@ -1950,25 +2134,25 @@ func txsAmountBasedOnBodiesSnapshots(snapDir string, blockFrom, blockTo uint64) return } -func TransactionsIdx(ctx context.Context, chainConfig *chain.Config, blockFrom, blockTo uint64, snapDir string, tmpDir string, p *background.Progress, lvl log.Lvl, logger log.Logger) (err error) { +func TransactionsIdx(ctx context.Context, chainConfig *chain.Config, version uint8, blockFrom, blockTo uint64, snapDir string, tmpDir string, p *background.Progress, lvl log.Lvl, logger log.Logger) (err error) { defer func() { if rec := recover(); rec != nil { err = fmt.Errorf("TransactionsIdx: at=%d-%d, %v, %s", blockFrom, blockTo, rec, dbg.Stack()) } }() firstBlockNum := blockFrom - firstTxID, expectedCount, err := txsAmountBasedOnBodiesSnapshots(snapDir, blockFrom, blockTo) + firstTxID, expectedCount, err := txsAmountBasedOnBodiesSnapshots(snapDir, version, blockFrom, blockTo) if err != nil { return err } - bodySegmentPath := filepath.Join(snapDir, snaptype.SegmentFileName(blockFrom, blockTo, snaptype.Bodies)) + bodySegmentPath := filepath.Join(snapDir, snaptype.SegmentFileName(version, blockFrom, blockTo, snaptype.Bodies)) bodiesSegment, err := compress.NewDecompressor(bodySegmentPath) if err != nil { return } defer bodiesSegment.Close() - segFileName := snaptype.SegmentFileName(blockFrom, blockTo, snaptype.Transactions) + segFileName := snaptype.SegmentFileName(version, blockFrom, blockTo, snaptype.Transactions) segmentFilePath := filepath.Join(snapDir, segFileName) d, err := compress.NewDecompressor(segmentFilePath) if err != nil { @@ -1978,8 +2162,11 @@ func TransactionsIdx(ctx context.Context, chainConfig *chain.Config, blockFrom, if d.Count() != expectedCount { return fmt.Errorf("TransactionsIdx: at=%d-%d, pre index building, expect: %d, got %d", blockFrom, blockTo, expectedCount, d.Count()) } - p.Name.Store(&segFileName) - p.Total.Store(uint64(d.Count() * 2)) + + if p != nil { + p.Name.Store(&segFileName) + p.Total.Store(uint64(d.Count() * 2)) + } txnHashIdx, err := recsplit.NewRecSplit(recsplit.RecSplitArgs{ KeyCount: d.Count(), @@ -1987,7 +2174,7 @@ func TransactionsIdx(ctx context.Context, chainConfig *chain.Config, blockFrom, BucketSize: 2000, LeafSize: 8, TmpDir: tmpDir, - IndexFile: filepath.Join(snapDir, snaptype.IdxFileName(blockFrom, blockTo, snaptype.Transactions.String())), + IndexFile: filepath.Join(snapDir, snaptype.IdxFileName(version, blockFrom, blockTo, snaptype.Transactions.String())), BaseDataID: firstTxID, EtlBufLimit: etl.BufferOptimalSize / 2, }, logger) @@ -2001,7 +2188,7 @@ func TransactionsIdx(ctx context.Context, chainConfig *chain.Config, blockFrom, BucketSize: 2000, LeafSize: 8, TmpDir: tmpDir, - IndexFile: filepath.Join(snapDir, snaptype.IdxFileName(blockFrom, blockTo, snaptype.Transactions2Block.String())), + IndexFile: filepath.Join(snapDir, snaptype.IdxFileName(version, blockFrom, blockTo, snaptype.Transactions2Block.String())), BaseDataID: firstBlockNum, EtlBufLimit: etl.BufferOptimalSize / 2, }, logger) @@ -2033,7 +2220,10 @@ RETRY: } for g.HasNext() { - p.Processed.Add(1) + if p != nil { + p.Processed.Add(1) + } + word, nextPos = g.Next(word[:0]) select { case <-ctx.Done(): @@ -2102,7 +2292,7 @@ RETRY: } // HeadersIdx - headerHash -> offset (analog of kv.HeaderNumber) -func HeadersIdx(ctx context.Context, segmentFilePath string, firstBlockNumInSegment uint64, tmpDir string, p *background.Progress, lvl log.Lvl, logger log.Logger) (err error) { +func HeadersIdx(ctx context.Context, segmentFilePath string, version uint8, firstBlockNumInSegment uint64, tmpDir string, p *background.Progress, lvl log.Lvl, logger log.Logger) (err error) { defer func() { if rec := recover(); rec != nil { _, fName := filepath.Split(segmentFilePath) @@ -2129,6 +2319,7 @@ func HeadersIdx(ctx context.Context, segmentFilePath string, firstBlockNumInSegm if p != nil { p.Processed.Add(1) } + headerRlp := word[1:] hasher.Reset() hasher.Write(headerRlp) @@ -2159,12 +2350,16 @@ func BodiesIdx(ctx context.Context, segmentFilePath string, firstBlockNumInSegme } defer d.Close() - _, fname := filepath.Split(segmentFilePath) - p.Name.Store(&fname) - p.Total.Store(uint64(d.Count())) + if p != nil { + _, fname := filepath.Split(segmentFilePath) + p.Name.Store(&fname) + p.Total.Store(uint64(d.Count())) + } if err := Idx(ctx, d, firstBlockNumInSegment, tmpDir, log.LvlDebug, func(idx *recsplit.RecSplit, i, offset uint64, word []byte) error { - p.Processed.Add(1) + if p != nil { + p.Processed.Add(1) + } n := binary.PutUvarint(num, i) if err := idx.AddKey(num[:n], offset); err != nil { return err @@ -2403,7 +2598,7 @@ func (m *Merger) Merge(ctx context.Context, snapshots *RoSnapshots, mergeRanges } for _, t := range snaptype.BlockSnapshotTypes { - segName := snaptype.SegmentFileName(r.from, r.to, t) + segName := snaptype.SegmentFileName(snapshots.version, r.from, r.to, t) f, ok := snaptype.ParseFileName(snapDir, segName) if !ok { continue @@ -2422,13 +2617,15 @@ func (m *Merger) Merge(ctx context.Context, snapshots *RoSnapshots, mergeRanges if err := snapshots.ReopenFolder(); err != nil { return fmt.Errorf("ReopenSegments: %w", err) } - snapshots.LogStat() + + snapshots.LogStat("merge") if onMerge != nil { if err := onMerge(r); err != nil { return err } } + for _, t := range snaptype.BlockSnapshotTypes { if len(toMerge[t]) == 0 { continue @@ -2438,10 +2635,10 @@ func (m *Merger) Merge(ctx context.Context, snapshots *RoSnapshots, mergeRanges return err } } - m.removeOldFiles(toMerge[t], snapDir) + m.removeOldFiles(toMerge[t], snapDir, snapshots.Version()) } } - m.logger.Log(m.lvl, "[snapshots] Merge done", "from", mergeRanges[0].from) + m.logger.Log(m.lvl, "[snapshots] Merge done", "from", mergeRanges[0].from, "to", mergeRanges[0].to) return nil } @@ -2494,7 +2691,7 @@ func (m *Merger) merge(ctx context.Context, toMerge []string, targetFile string, return nil } -func (m *Merger) removeOldFiles(toDel []string, snapDir string) { +func (m *Merger) removeOldFiles(toDel []string, snapDir string, version uint8) { for _, f := range toDel { _ = os.Remove(f) _ = os.Remove(f + ".torrent") @@ -2506,7 +2703,7 @@ func (m *Merger) removeOldFiles(toDel []string, snapDir string) { _ = os.Remove(withoutExt + "-to-block.idx") } } - tmpFiles, err := snaptype.TmpFiles(snapDir) + tmpFiles, err := snaptype.TmpFiles(snapDir, version) if err != nil { return } diff --git a/turbo/snapshotsync/freezeblocks/block_snapshots_test.go b/turbo/snapshotsync/freezeblocks/block_snapshots_test.go index 609083d5891..2cb17f77d80 100644 --- a/turbo/snapshotsync/freezeblocks/block_snapshots_test.go +++ b/turbo/snapshotsync/freezeblocks/block_snapshots_test.go @@ -19,8 +19,8 @@ import ( "github.com/ledgerwatch/erigon/params" ) -func createTestSegmentFile(t *testing.T, from, to uint64, name snaptype.Type, dir string, logger log.Logger) { - c, err := compress.NewCompressor(context.Background(), "test", filepath.Join(dir, snaptype.SegmentFileName(from, to, name)), dir, 100, 1, log.LvlDebug, logger) +func createTestSegmentFile(t *testing.T, from, to uint64, name snaptype.Type, dir string, version uint8, logger log.Logger) { + c, err := compress.NewCompressor(context.Background(), "test", filepath.Join(dir, snaptype.SegmentFileName(version, from, to, name)), dir, 100, 1, log.LvlDebug, logger) require.NoError(t, err) defer c.Close() c.DisableFsync() @@ -32,7 +32,7 @@ func createTestSegmentFile(t *testing.T, from, to uint64, name snaptype.Type, di KeyCount: 1, BucketSize: 10, TmpDir: dir, - IndexFile: filepath.Join(dir, snaptype.IdxFileName(from, to, name.String())), + IndexFile: filepath.Join(dir, snaptype.IdxFileName(1, from, to, name.String())), LeafSize: 8, }, logger) require.NoError(t, err) @@ -47,7 +47,7 @@ func createTestSegmentFile(t *testing.T, from, to uint64, name snaptype.Type, di KeyCount: 1, BucketSize: 10, TmpDir: dir, - IndexFile: filepath.Join(dir, snaptype.IdxFileName(from, to, snaptype.Transactions2Block.String())), + IndexFile: filepath.Join(dir, snaptype.IdxFileName(1, from, to, snaptype.Transactions2Block.String())), LeafSize: 8, }, logger) require.NoError(t, err) @@ -94,7 +94,7 @@ func TestMergeSnapshots(t *testing.T) { dir, require := t.TempDir(), require.New(t) createFile := func(from, to uint64) { for _, snT := range snaptype.BlockSnapshotTypes { - createTestSegmentFile(t, from, to, snT, dir, logger) + createTestSegmentFile(t, from, to, snT, dir, 1, logger) } } @@ -102,7 +102,7 @@ func TestMergeSnapshots(t *testing.T) { for i := uint64(0); i < N; i++ { createFile(i*10_000, (i+1)*10_000) } - s := NewRoSnapshots(ethconfig.BlocksFreezing{Enabled: true}, dir, logger) + s := NewRoSnapshots(ethconfig.BlocksFreezing{Enabled: true}, dir, 1, logger) defer s.Close() require.NoError(s.ReopenFolder()) { @@ -114,7 +114,7 @@ func TestMergeSnapshots(t *testing.T) { require.NoError(err) } - expectedFileName := snaptype.SegmentFileName(100_000, 200_000, snaptype.Transactions) + expectedFileName := snaptype.SegmentFileName(1, 100_000, 200_000, snaptype.Transactions) d, err := compress.NewDecompressor(filepath.Join(dir, expectedFileName)) require.NoError(err) defer d.Close() @@ -130,7 +130,7 @@ func TestMergeSnapshots(t *testing.T) { require.NoError(err) } - expectedFileName = snaptype.SegmentFileName(600_000, 700_000, snaptype.Transactions) + expectedFileName = snaptype.SegmentFileName(1, 600_000, 700_000, snaptype.Transactions) d, err = compress.NewDecompressor(filepath.Join(dir, expectedFileName)) require.NoError(err) defer d.Close() @@ -160,11 +160,11 @@ func TestCanRetire(t *testing.T) { func TestOpenAllSnapshot(t *testing.T) { logger := log.New() dir, require := t.TempDir(), require.New(t) - chainSnapshotCfg := snapcfg.KnownCfg(networkname.MainnetChainName) + chainSnapshotCfg := snapcfg.KnownCfg(networkname.MainnetChainName, 0) chainSnapshotCfg.ExpectBlocks = math.MaxUint64 cfg := ethconfig.BlocksFreezing{Enabled: true} - createFile := func(from, to uint64, name snaptype.Type) { createTestSegmentFile(t, from, to, name, dir, logger) } - s := NewRoSnapshots(cfg, dir, logger) + createFile := func(from, to uint64, name snaptype.Type) { createTestSegmentFile(t, from, to, name, dir, 1, logger) } + s := NewRoSnapshots(cfg, dir, 1, logger) defer s.Close() err := s.ReopenFolder() require.NoError(err) @@ -172,14 +172,14 @@ func TestOpenAllSnapshot(t *testing.T) { s.Close() createFile(500_000, 1_000_000, snaptype.Bodies) - s = NewRoSnapshots(cfg, dir, logger) + s = NewRoSnapshots(cfg, dir, 1, logger) defer s.Close() require.Equal(0, len(s.Bodies.segments)) //because, no headers and transactions snapshot files are created s.Close() createFile(500_000, 1_000_000, snaptype.Headers) createFile(500_000, 1_000_000, snaptype.Transactions) - s = NewRoSnapshots(cfg, dir, logger) + s = NewRoSnapshots(cfg, dir, 1, logger) err = s.ReopenFolder() require.NoError(err) require.Equal(0, len(s.Headers.segments)) @@ -188,7 +188,7 @@ func TestOpenAllSnapshot(t *testing.T) { createFile(0, 500_000, snaptype.Bodies) createFile(0, 500_000, snaptype.Headers) createFile(0, 500_000, snaptype.Transactions) - s = NewRoSnapshots(cfg, dir, logger) + s = NewRoSnapshots(cfg, dir, 1, logger) defer s.Close() err = s.ReopenFolder() @@ -212,7 +212,7 @@ func TestOpenAllSnapshot(t *testing.T) { // Erigon may create new snapshots by itself - with high bigger than hardcoded ExpectedBlocks // ExpectedBlocks - says only how much block must come from Torrent chainSnapshotCfg.ExpectBlocks = 500_000 - 1 - s = NewRoSnapshots(cfg, dir, logger) + s = NewRoSnapshots(cfg, dir, 1, logger) err = s.ReopenFolder() require.NoError(err) defer s.Close() @@ -222,7 +222,7 @@ func TestOpenAllSnapshot(t *testing.T) { createFile(500_000, 900_000, snaptype.Bodies) createFile(500_000, 900_000, snaptype.Transactions) chainSnapshotCfg.ExpectBlocks = math.MaxUint64 - s = NewRoSnapshots(cfg, dir, logger) + s = NewRoSnapshots(cfg, dir, 1, logger) defer s.Close() err = s.ReopenFolder() require.NoError(err) diff --git a/turbo/snapshotsync/freezeblocks/bor_snapshots.go b/turbo/snapshotsync/freezeblocks/bor_snapshots.go index 02e28b955cb..0c203d9fa7d 100644 --- a/turbo/snapshotsync/freezeblocks/bor_snapshots.go +++ b/turbo/snapshotsync/freezeblocks/bor_snapshots.go @@ -7,7 +7,6 @@ import ( "errors" "fmt" "os" - "path" "path/filepath" "reflect" "runtime" @@ -42,6 +41,7 @@ type BorEventSegment struct { seg *compress.Decompressor // value: event_rlp IdxBorTxnHash *recsplit.Index // bor_transaction_hash -> bor_event_segment_offset Range + version uint8 } func (sn *BorEventSegment) closeIdx() { @@ -62,8 +62,8 @@ func (sn *BorEventSegment) close() { } func (sn *BorEventSegment) reopenSeg(dir string) (err error) { sn.closeSeg() - fileName := snaptype.SegmentFileName(sn.from, sn.to, snaptype.BorEvents) - sn.seg, err = compress.NewDecompressor(path.Join(dir, fileName)) + fileName := snaptype.SegmentFileName(sn.version, sn.from, sn.to, snaptype.BorEvents) + sn.seg, err = compress.NewDecompressor(filepath.Join(dir, fileName)) if err != nil { return fmt.Errorf("%w, fileName: %s", err, fileName) } @@ -75,8 +75,8 @@ func (sn *BorEventSegment) reopenIdx(dir string) (err error) { return nil } - fileName := snaptype.IdxFileName(sn.from, sn.to, snaptype.BorEvents.String()) - sn.IdxBorTxnHash, err = recsplit.OpenIndex(path.Join(dir, fileName)) + fileName := snaptype.IdxFileName(sn.version, sn.from, sn.to, snaptype.BorEvents.String()) + sn.IdxBorTxnHash, err = recsplit.OpenIndex(filepath.Join(dir, fileName)) if err != nil { return fmt.Errorf("%w, fileName: %s", err, fileName) } @@ -109,6 +109,7 @@ type BorSpanSegment struct { seg *compress.Decompressor // value: span_json idx *recsplit.Index // span_id -> offset Range + version uint8 } func (sn *BorSpanSegment) closeIdx() { @@ -129,8 +130,8 @@ func (sn *BorSpanSegment) close() { } func (sn *BorSpanSegment) reopenSeg(dir string) (err error) { sn.closeSeg() - fileName := snaptype.SegmentFileName(sn.from, sn.to, snaptype.BorSpans) - sn.seg, err = compress.NewDecompressor(path.Join(dir, fileName)) + fileName := snaptype.SegmentFileName(sn.version, sn.from, sn.to, snaptype.BorSpans) + sn.seg, err = compress.NewDecompressor(filepath.Join(dir, fileName)) if err != nil { return fmt.Errorf("%w, fileName: %s", err, fileName) } @@ -141,8 +142,8 @@ func (sn *BorSpanSegment) reopenIdx(dir string) (err error) { if sn.seg == nil { return nil } - fileName := snaptype.IdxFileName(sn.from, sn.to, snaptype.BorSpans.String()) - sn.idx, err = recsplit.OpenIndex(path.Join(dir, fileName)) + fileName := snaptype.IdxFileName(sn.version, sn.from, sn.to, snaptype.BorSpans.String()) + sn.idx, err = recsplit.OpenIndex(filepath.Join(dir, fileName)) if err != nil { return fmt.Errorf("%w, fileName: %s", err, fileName) } @@ -171,21 +172,21 @@ type borSpanSegments struct { segments []*BorSpanSegment } -func (br *BlockRetire) retireBorBlocks(ctx context.Context, forwardProgress uint64, lvl log.Lvl, seedNewSnapshots func(downloadRequest []services.DownloadRequest) error, onDelete func(l []string) error) (bool, error) { +func (br *BlockRetire) retireBorBlocks(ctx context.Context, minBlockNum uint64, maxBlockNum uint64, lvl log.Lvl, seedNewSnapshots func(downloadRequest []services.DownloadRequest) error, onDelete func(l []string) error) (bool, error) { chainConfig := fromdb.ChainConfig(br.db) notifier, logger, blockReader, tmpDir, db, workers := br.notifier, br.logger, br.blockReader, br.tmpDir, br.db, br.workers snapshots := br.borSnapshots() firstTxNum := blockReader.(*BlockReader).FirstTxNumNotInSnapshots() - blockFrom, blockTo, ok := CanRetire(forwardProgress, br.blockReader.FrozenBorBlocks()) + blockFrom, blockTo, ok := CanRetire(maxBlockNum, minBlockNum) if ok { logger.Log(lvl, "[bor snapshots] Retire Bor Blocks", "range", fmt.Sprintf("%dk-%dk", blockFrom/1000, blockTo/1000)) - if err := DumpBorBlocks(ctx, chainConfig, blockFrom, blockTo, snaptype.Erigon2MergeLimit, tmpDir, snapshots.Dir(), firstTxNum, db, workers, lvl, logger, blockReader); err != nil { + if err := DumpBorBlocks(ctx, chainConfig, snapshots.version, blockFrom, blockTo, snaptype.Erigon2MergeLimit, tmpDir, snapshots.Dir(), firstTxNum, db, workers, lvl, logger, blockReader); err != nil { return ok, fmt.Errorf("DumpBorBlocks: %w", err) } if err := snapshots.ReopenFolder(); err != nil { return ok, fmt.Errorf("reopen: %w", err) } - snapshots.LogStat() + snapshots.LogStat("retire") if notifier != nil && !reflect.ValueOf(notifier).IsNil() { // notify about new snapshots of any size notifier.OnNewSnapshot() } @@ -219,25 +220,25 @@ func (br *BlockRetire) retireBorBlocks(ctx context.Context, forwardProgress uint } return ok, nil } -func DumpBorBlocks(ctx context.Context, chainConfig *chain.Config, blockFrom, blockTo, blocksPerFile uint64, tmpDir, snapDir string, firstTxNum uint64, chainDB kv.RoDB, workers int, lvl log.Lvl, logger log.Logger, blockReader services.FullBlockReader) error { +func DumpBorBlocks(ctx context.Context, chainConfig *chain.Config, version uint8, blockFrom, blockTo, blocksPerFile uint64, tmpDir, snapDir string, firstTxNum uint64, chainDB kv.RoDB, workers int, lvl log.Lvl, logger log.Logger, blockReader services.FullBlockReader) error { if blocksPerFile == 0 { return nil } for i := blockFrom; i < blockTo; i = chooseSegmentEnd(i, blockTo, blocksPerFile) { - if err := dumpBorBlocksRange(ctx, i, chooseSegmentEnd(i, blockTo, blocksPerFile), tmpDir, snapDir, firstTxNum, chainDB, *chainConfig, workers, lvl, logger, blockReader); err != nil { + if err := dumpBorBlocksRange(ctx, version, i, chooseSegmentEnd(i, blockTo, blocksPerFile), tmpDir, snapDir, firstTxNum, chainDB, *chainConfig, workers, lvl, logger, blockReader); err != nil { return err } } return nil } -func dumpBorBlocksRange(ctx context.Context, blockFrom, blockTo uint64, tmpDir, snapDir string, firstTxNum uint64, chainDB kv.RoDB, chainConfig chain.Config, workers int, lvl log.Lvl, logger log.Logger, blockReader services.FullBlockReader) error { +func dumpBorBlocksRange(ctx context.Context, version uint8, blockFrom, blockTo uint64, tmpDir, snapDir string, firstTxNum uint64, chainDB kv.RoDB, chainConfig chain.Config, workers int, lvl log.Lvl, logger log.Logger, blockReader services.FullBlockReader) error { logEvery := time.NewTicker(20 * time.Second) defer logEvery.Stop() { - segName := snaptype.SegmentFileName(blockFrom, blockTo, snaptype.BorEvents) + segName := snaptype.SegmentFileName(version, blockFrom, blockTo, snaptype.BorEvents) f, _ := snaptype.ParseFileName(snapDir, segName) sn, err := compress.NewCompressor(ctx, "Snapshot BorEvents", f.Path, tmpDir, compress.MinPatternScore, workers, log.LvlTrace, logger) @@ -260,7 +261,7 @@ func dumpBorBlocksRange(ctx context.Context, blockFrom, blockTo uint64, tmpDir, } } { - segName := snaptype.SegmentFileName(blockFrom, blockTo, snaptype.BorSpans) + segName := snaptype.SegmentFileName(version, blockFrom, blockTo, snaptype.BorSpans) f, _ := snaptype.ParseFileName(snapDir, segName) sn, err := compress.NewCompressor(ctx, "Snapshot BorSpans", f.Path, tmpDir, compress.MinPatternScore, workers, log.LvlTrace, logger) @@ -406,7 +407,7 @@ func DumpBorSpans(ctx context.Context, db kv.RoDB, blockFrom, blockTo uint64, wo return nil } -func BorEventsIdx(ctx context.Context, segmentFilePath string, blockFrom, blockTo uint64, snapDir string, tmpDir string, p *background.Progress, lvl log.Lvl, logger log.Logger) (err error) { +func BorEventsIdx(ctx context.Context, segmentFilePath string, version uint8, blockFrom, blockTo uint64, snapDir string, tmpDir string, p *background.Progress, lvl log.Lvl, logger log.Logger) (err error) { defer func() { if rec := recover(); rec != nil { err = fmt.Errorf("BorEventsIdx: at=%d-%d, %v, %s", blockFrom, blockTo, rec, dbg.Stack()) @@ -440,7 +441,7 @@ func BorEventsIdx(ctx context.Context, segmentFilePath string, blockFrom, blockT default: } } - var idxFilePath = filepath.Join(snapDir, snaptype.IdxFileName(blockFrom, blockTo, snaptype.BorEvents.String())) + var idxFilePath = filepath.Join(snapDir, snaptype.IdxFileName(version, blockFrom, blockTo, snaptype.BorEvents.String())) rs, err := recsplit.NewRecSplit(recsplit.RecSplitArgs{ KeyCount: blockCount, @@ -492,7 +493,7 @@ RETRY: return nil } -func BorSpansIdx(ctx context.Context, segmentFilePath string, blockFrom, blockTo uint64, snapDir string, tmpDir string, p *background.Progress, lvl log.Lvl, logger log.Logger) (err error) { +func BorSpansIdx(ctx context.Context, segmentFilePath string, version uint8, blockFrom, blockTo uint64, snapDir string, tmpDir string, p *background.Progress, lvl log.Lvl, logger log.Logger) (err error) { defer func() { if rec := recover(); rec != nil { err = fmt.Errorf("BorSpansIdx: at=%d-%d, %v, %s", blockFrom, blockTo, rec, dbg.Stack()) @@ -505,7 +506,7 @@ func BorSpansIdx(ctx context.Context, segmentFilePath string, blockFrom, blockTo } defer d.Close() g := d.MakeGetter() - var idxFilePath = filepath.Join(snapDir, snaptype.IdxFileName(blockFrom, blockTo, snaptype.BorSpans.String())) + var idxFilePath = filepath.Join(snapDir, snaptype.IdxFileName(version, blockFrom, blockTo, snaptype.BorSpans.String())) baseSpanId := bor.SpanIDAt(blockFrom) @@ -566,6 +567,9 @@ type BorRoSnapshots struct { idxMax atomic.Uint64 // all types of .idx files are available - up to this number cfg ethconfig.BlocksFreezing logger log.Logger + version uint8 + + segmentsMin atomic.Uint64 } // NewBorRoSnapshots - opens all bor snapshots. But to simplify everything: @@ -573,30 +577,33 @@ type BorRoSnapshots struct { // - all snapshots of given blocks range must exist - to make this blocks range available // - gaps are not allowed // - segment have [from:to) semantic -func NewBorRoSnapshots(cfg ethconfig.BlocksFreezing, snapDir string, logger log.Logger) *BorRoSnapshots { - return &BorRoSnapshots{dir: snapDir, cfg: cfg, Events: &borEventSegments{}, Spans: &borSpanSegments{}, logger: logger} +func NewBorRoSnapshots(cfg ethconfig.BlocksFreezing, snapDir string, version uint8, logger log.Logger) *BorRoSnapshots { + return &BorRoSnapshots{dir: snapDir, version: version, cfg: cfg, Events: &borEventSegments{}, Spans: &borSpanSegments{}, logger: logger} } +func (s *BorRoSnapshots) Version() uint8 { return s.version } func (s *BorRoSnapshots) Cfg() ethconfig.BlocksFreezing { return s.cfg } func (s *BorRoSnapshots) Dir() string { return s.dir } func (s *BorRoSnapshots) SegmentsReady() bool { return s.segmentsReady.Load() } func (s *BorRoSnapshots) IndicesReady() bool { return s.indicesReady.Load() } func (s *BorRoSnapshots) IndicesMax() uint64 { return s.idxMax.Load() } func (s *BorRoSnapshots) SegmentsMax() uint64 { return s.segmentsMax.Load() } +func (s *BorRoSnapshots) SegmentsMin() uint64 { return s.segmentsMin.Load() } +func (s *BorRoSnapshots) SetSegmentsMin(min uint64) { s.segmentsMin.Store(min) } func (s *BorRoSnapshots) BlocksAvailable() uint64 { return cmp.Min(s.segmentsMax.Load(), s.idxMax.Load()) } -func (s *BorRoSnapshots) LogStat() { +func (s *BorRoSnapshots) LogStat(label string) { var m runtime.MemStats dbg.ReadMemStats(&m) - s.logger.Info("[bor snapshots] Blocks Stat", + s.logger.Info(fmt.Sprintf("[bor snapshots:%s] Blocks Stat", label), "blocks", fmt.Sprintf("%dk", (s.SegmentsMax()+1)/1000), "indices", fmt.Sprintf("%dk", (s.IndicesMax()+1)/1000), "alloc", common2.ByteCount(m.Alloc), "sys", common2.ByteCount(m.Sys)) } -func BorSegments(dir string) (res []snaptype.FileInfo, missingSnapshots []Range, err error) { - list, err := snaptype.Segments(dir) +func BorSegments(dir string, version uint8, min uint64) (res []snaptype.FileInfo, missingSnapshots []Range, err error) { + list, err := snaptype.Segments(dir, version) if err != nil { return nil, missingSnapshots, err } @@ -609,7 +616,7 @@ func BorSegments(dir string) (res []snaptype.FileInfo, missingSnapshots []Range, } l = append(l, f) } - l, m = noGaps(noOverlaps(borSegmentsMustExist(dir, l))) + l, m = noGaps(noOverlaps(borSegmentsMustExist(dir, l)), min) res = append(res, l...) missingSnapshots = append(missingSnapshots, m...) } @@ -621,13 +628,75 @@ func BorSegments(dir string) (res []snaptype.FileInfo, missingSnapshots []Range, } l = append(l, f) } - l, _ = noGaps(noOverlaps(borSegmentsMustExist(dir, l))) + l, _ = noGaps(noOverlaps(borSegmentsMustExist(dir, l)), min) res = append(res, l...) } return res, missingSnapshots, nil } +// this is one off code to fix an issue in 2.49.x->2.52.x which missed +// removal of intermediate segments after a merge operation +func removeBorOverlaps(dir string, version uint8, active []snaptype.FileInfo, max uint64) { + list, err := snaptype.Segments(dir, version) + + if err != nil { + return + } + + var toDel []string + l := make([]snaptype.FileInfo, 0, len(list)) + + for _, f := range list { + if !(f.T == snaptype.BorSpans || f.T == snaptype.BorEvents) { + continue + } + l = append(l, f) + } + + // added overhead to make sure we don't delete in the + // current 500k block segment + if max > 500_001 { + max -= 500_001 + } + + for _, f := range l { + if max < f.From { + continue + } + + for _, a := range active { + if a.T != snaptype.BorSpans { + continue + } + + if f.From < a.From { + continue + } + + if f.From == a.From { + if f.To < a.To { + toDel = append(toDel, f.Path) + } + + break + } + + if f.From < a.To { + toDel = append(toDel, f.Path) + break + } + } + } + + for _, f := range toDel { + _ = os.Remove(f) + ext := filepath.Ext(f) + withoutExt := f[:len(f)-len(ext)] + _ = os.Remove(withoutExt + ".idx") + } +} + func (s *BorRoSnapshots) EnsureExpectedBlocksAreAvailable(cfg *snapcfg.Cfg) error { if s.BlocksAvailable() < cfg.ExpectBlocks { return fmt.Errorf("app must wait until all expected bor snapshots are available. Expected: %d, Available: %d", cfg.ExpectBlocks, s.BlocksAvailable()) @@ -782,7 +851,7 @@ Loop: } } if !exists { - sn = &BorEventSegment{Range: Range{f.From, f.To}} + sn = &BorEventSegment{version: f.Version, Range: Range{f.From, f.To}} } if err := sn.reopenSeg(s.dir); err != nil { if errors.Is(err, os.ErrNotExist) { @@ -822,7 +891,7 @@ Loop: } } if !exists { - sn = &BorSpanSegment{Range: Range{f.From, f.To}} + sn = &BorSpanSegment{version: f.Version, Range: Range{f.From, f.To}} } if err := sn.reopenSeg(s.dir); err != nil { if errors.Is(err, os.ErrNotExist) { @@ -884,10 +953,15 @@ func (s *BorRoSnapshots) Ranges() (ranges []Range) { func (s *BorRoSnapshots) OptimisticalyReopenFolder() { _ = s.ReopenFolder() } func (s *BorRoSnapshots) OptimisticalyReopenWithDB(db kv.RoDB) { _ = s.ReopenWithDB(db) } func (s *BorRoSnapshots) ReopenFolder() error { - files, _, err := BorSegments(s.dir) + files, _, err := BorSegments(s.dir, s.version, s.segmentsMin.Load()) if err != nil { return err } + + // this is one off code to fix an issue in 2.49.x->2.52.x which missed + // removal of intermediate segments after a merge operation + removeBorOverlaps(s.dir, s.version, files, s.BlocksAvailable()) + list := make([]string, 0, len(files)) for _, f := range files { _, fName := filepath.Split(f.Path) @@ -1099,8 +1173,8 @@ func (m *BorMerger) Merge(ctx context.Context, snapshots *BorRoSnapshots, mergeR return err } - for _, t := range []snaptype.Type{snaptype.BorEvents, snaptype.BorSpans} { - segName := snaptype.SegmentFileName(r.from, r.to, t) + for _, t := range snaptype.BorSnapshotTypes { + segName := snaptype.SegmentFileName(snapshots.Version(), r.from, r.to, t) f, ok := snaptype.ParseFileName(snapDir, segName) if !ok { continue @@ -1118,20 +1192,24 @@ func (m *BorMerger) Merge(ctx context.Context, snapshots *BorRoSnapshots, mergeR if err := snapshots.ReopenFolder(); err != nil { return fmt.Errorf("ReopenSegments: %w", err) } - snapshots.LogStat() + snapshots.LogStat("merge") if err := onMerge(r); err != nil { return err } - for _, t := range snaptype.BlockSnapshotTypes { + + for _, t := range snaptype.BorSnapshotTypes { if len(toMerge[t]) == 0 { continue } + if err := onDelete(toMerge[t]); err != nil { return err } + } - for _, t := range []snaptype.Type{snaptype.BorEvents, snaptype.BorSpans} { - m.removeOldFiles(toMerge[t], snapDir) + time.Sleep(1 * time.Second) // i working on blocking API - to ensure client does not use old snapsthos - and then delete them + for _, t := range snaptype.BorSnapshotTypes { + m.removeOldFiles(toMerge[t], snapDir, snapshots.Version()) } } m.logger.Log(m.lvl, "[bor snapshots] Merge done", "from", mergeRanges[0].from, "to", mergeRanges[0].to) @@ -1181,14 +1259,14 @@ func (m *BorMerger) merge(ctx context.Context, toMerge []string, targetFile stri return nil } -func (m *BorMerger) removeOldFiles(toDel []string, snapDir string) { +func (m *BorMerger) removeOldFiles(toDel []string, snapDir string, version uint8) { for _, f := range toDel { _ = os.Remove(f) ext := filepath.Ext(f) withoutExt := f[:len(f)-len(ext)] _ = os.Remove(withoutExt + ".idx") } - tmpFiles, err := snaptype.TmpFiles(snapDir) + tmpFiles, err := snaptype.TmpFiles(snapDir, version) if err != nil { return } diff --git a/turbo/snapshotsync/freezeblocks/caplin_snapshots.go b/turbo/snapshotsync/freezeblocks/caplin_snapshots.go index 4661224dd4e..91462f5e3ad 100644 --- a/turbo/snapshotsync/freezeblocks/caplin_snapshots.go +++ b/turbo/snapshotsync/freezeblocks/caplin_snapshots.go @@ -7,7 +7,6 @@ import ( "errors" "fmt" "os" - "path" "path/filepath" "sync" "sync/atomic" @@ -33,6 +32,7 @@ type BeaconBlockSegment struct { seg *compress.Decompressor // value: chunked(ssz(SignedBeaconBlocks)) idxSlot *recsplit.Index // slot -> beacon_slot_segment_offset ranges Range + version uint8 } func (sn *BeaconBlockSegment) closeIdx() { @@ -53,8 +53,8 @@ func (sn *BeaconBlockSegment) close() { } func (sn *BeaconBlockSegment) reopenSeg(dir string) (err error) { sn.closeSeg() - fileName := snaptype.SegmentFileName(sn.ranges.from, sn.ranges.to, snaptype.BeaconBlocks) - sn.seg, err = compress.NewDecompressor(path.Join(dir, fileName)) + fileName := snaptype.SegmentFileName(sn.version, sn.ranges.from, sn.ranges.to, snaptype.BeaconBlocks) + sn.seg, err = compress.NewDecompressor(filepath.Join(dir, fileName)) if err != nil { return fmt.Errorf("%w, fileName: %s", err, fileName) } @@ -82,8 +82,8 @@ func (sn *BeaconBlockSegment) reopenIdx(dir string) (err error) { if sn.seg == nil { return nil } - fileName := snaptype.IdxFileName(sn.ranges.from, sn.ranges.to, snaptype.BeaconBlocks.String()) - sn.idxSlot, err = recsplit.OpenIndex(path.Join(dir, fileName)) + fileName := snaptype.IdxFileName(sn.version, sn.ranges.from, sn.ranges.to, snaptype.BeaconBlocks.String()) + sn.idxSlot, err = recsplit.OpenIndex(filepath.Join(dir, fileName)) if err != nil { return fmt.Errorf("%w, fileName: %s", err, fileName) } @@ -146,6 +146,9 @@ type CaplinSnapshots struct { idxMax atomic.Uint64 // all types of .idx files are available - up to this number cfg ethconfig.BlocksFreezing logger log.Logger + // allows for pruning segments - this is the min availible segment + segmentsMin atomic.Uint64 + version uint8 // chain cfg beaconCfg *clparams.BeaconChainConfig } @@ -155,10 +158,11 @@ type CaplinSnapshots struct { // - all snapshots of given blocks range must exist - to make this blocks range available // - gaps are not allowed // - segment have [from:to) semantic -func NewCaplinSnapshots(cfg ethconfig.BlocksFreezing, beaconCfg *clparams.BeaconChainConfig, snapDir string, logger log.Logger) *CaplinSnapshots { - return &CaplinSnapshots{dir: snapDir, cfg: cfg, BeaconBlocks: &beaconBlockSegments{}, logger: logger, beaconCfg: beaconCfg} +func NewCaplinSnapshots(cfg ethconfig.BlocksFreezing, beaconCfg *clparams.BeaconChainConfig, snapDir string, version uint8, logger log.Logger) *CaplinSnapshots { + return &CaplinSnapshots{dir: snapDir, version: version, cfg: cfg, BeaconBlocks: &beaconBlockSegments{}, logger: logger, beaconCfg: beaconCfg} } +func (s *CaplinSnapshots) Version() uint8 { return s.version } func (s *CaplinSnapshots) IndicesMax() uint64 { return s.idxMax.Load() } func (s *CaplinSnapshots) SegmentsMax() uint64 { return s.segmentsMax.Load() } @@ -266,7 +270,7 @@ func (s *CaplinSnapshots) idxAvailability() uint64 { } func (s *CaplinSnapshots) ReopenFolder() error { - files, _, err := SegmentsCaplin(s.dir) + files, _, err := SegmentsCaplin(s.dir, s.version, s.segmentsMin.Load()) if err != nil { return err } @@ -338,8 +342,8 @@ func (v *CaplinView) BeaconBlocksSegment(slot uint64) (*BeaconBlockSegment, bool return nil, false } -func dumpBeaconBlocksRange(ctx context.Context, db kv.RoDB, b persistence.BlockSource, fromSlot uint64, toSlot uint64, tmpDir, snapDir string, workers int, lvl log.Lvl, logger log.Logger) error { - segName := snaptype.SegmentFileName(fromSlot, toSlot, snaptype.BeaconBlocks) +func dumpBeaconBlocksRange(ctx context.Context, db kv.RoDB, b persistence.BlockSource, version uint8, fromSlot uint64, toSlot uint64, tmpDir, snapDir string, workers int, lvl log.Lvl, logger log.Logger) error { + segName := snaptype.SegmentFileName(version, fromSlot, toSlot, snaptype.BeaconBlocks) f, _ := snaptype.ParseFileName(snapDir, segName) sn, err := compress.NewCompressor(ctx, "Snapshot BeaconBlocks", f.Path, tmpDir, compress.MinPatternScore, workers, lvl, logger) @@ -397,10 +401,10 @@ func dumpBeaconBlocksRange(ctx context.Context, db kv.RoDB, b persistence.BlockS // Generate .idx file, which is the slot => offset mapping. p := &background.Progress{} - return BeaconBlocksIdx(ctx, f, path.Join(snapDir, segName), fromSlot, toSlot, tmpDir, p, lvl, logger) + return BeaconBlocksIdx(ctx, f, filepath.Join(snapDir, segName), fromSlot, toSlot, tmpDir, p, lvl, logger) } -func DumpBeaconBlocks(ctx context.Context, db kv.RoDB, b persistence.BlockSource, fromSlot, toSlot, blocksPerFile uint64, tmpDir, snapDir string, workers int, lvl log.Lvl, logger log.Logger) error { +func DumpBeaconBlocks(ctx context.Context, db kv.RoDB, b persistence.BlockSource, version uint8, fromSlot, toSlot, blocksPerFile uint64, tmpDir, snapDir string, workers int, lvl log.Lvl, logger log.Logger) error { if blocksPerFile == 0 { return nil } @@ -411,7 +415,7 @@ func DumpBeaconBlocks(ctx context.Context, db kv.RoDB, b persistence.BlockSource } to := chooseSegmentEnd(i, toSlot, blocksPerFile) logger.Log(lvl, "Dumping beacon blocks", "from", i, "to", to) - if err := dumpBeaconBlocksRange(ctx, db, b, i, to, tmpDir, snapDir, workers, lvl, logger); err != nil { + if err := dumpBeaconBlocksRange(ctx, db, b, version, i, to, tmpDir, snapDir, workers, lvl, logger); err != nil { return err } } @@ -424,7 +428,7 @@ func (s *CaplinSnapshots) BuildMissingIndices(ctx context.Context, logger log.Lo // } // wait for Downloader service to download all expected snapshots - segments, _, err := SegmentsCaplin(s.dir) + segments, _, err := SegmentsCaplin(s.dir, s.version, 0) if err != nil { return err } diff --git a/turbo/snapshotsync/freezeblocks/dump_test.go b/turbo/snapshotsync/freezeblocks/dump_test.go index 5136f711534..734e7a4728a 100644 --- a/turbo/snapshotsync/freezeblocks/dump_test.go +++ b/turbo/snapshotsync/freezeblocks/dump_test.go @@ -236,10 +236,10 @@ func TestDump(t *testing.T) { logger := log.New() tmpDir, snapDir := t.TempDir(), t.TempDir() - snConfig := snapcfg.KnownCfg(networkname.MainnetChainName) + snConfig := snapcfg.KnownCfg(networkname.MainnetChainName, 0) snConfig.ExpectBlocks = math.MaxUint64 - err := freezeblocks.DumpBlocks(m.Ctx, 0, uint64(test.chainSize), uint64(test.chainSize), tmpDir, snapDir, 0, m.DB, 1, log.LvlInfo, logger, m.BlockReader) + err := freezeblocks.DumpBlocks(m.Ctx, 1, 0, uint64(test.chainSize), uint64(test.chainSize), tmpDir, snapDir, 0, m.DB, 1, log.LvlInfo, logger, m.BlockReader) require.NoError(err) }) } diff --git a/turbo/snapshotsync/snapshotsync.go b/turbo/snapshotsync/snapshotsync.go index 0c9d7663db5..b55104250c3 100644 --- a/turbo/snapshotsync/snapshotsync.go +++ b/turbo/snapshotsync/snapshotsync.go @@ -67,7 +67,7 @@ func RequestSnapshotsDownload(ctx context.Context, downloadRequest []services.Do // WaitForDownloader - wait for Downloader service to download all expected snapshots // for MVP we sync with Downloader only once, in future will send new snapshots also -func WaitForDownloader(logPrefix string, ctx context.Context, histV3 bool, caplin CaplinMode, agg *state.AggregatorV3, tx kv.RwTx, blockReader services.FullBlockReader, cc *chain.Config, snapshotDownloader proto_downloader.DownloaderClient) error { +func WaitForDownloader(ctx context.Context, logPrefix string, histV3 bool, caplin CaplinMode, agg *state.AggregatorV3, tx kv.RwTx, blockReader services.FullBlockReader, cc *chain.Config, snapshotDownloader proto_downloader.DownloaderClient) error { snapshots := blockReader.Snapshots() borSnapshots := blockReader.BorSnapshots() if blockReader.FreezingCfg().NoDownloader { @@ -87,7 +87,9 @@ func WaitForDownloader(logPrefix string, ctx context.Context, histV3 bool, capli // - Erigon "download once": means restart/upgrade/downgrade must not download files (and will be fast) // - After "download once" - Erigon will produce and seed new files - preverifiedBlockSnapshots := snapcfg.KnownCfg(cc.ChainName).Preverified + // send all hashes to the Downloader service + snapCfg := snapcfg.KnownCfg(cc.ChainName, 0) + preverifiedBlockSnapshots := snapCfg.Preverified downloadRequest := make([]services.DownloadRequest, 0, len(preverifiedBlockSnapshots)) // build all download requests diff --git a/turbo/stages/genesis_test.go b/turbo/stages/genesis_test.go index 67e685eb366..a510d384c43 100644 --- a/turbo/stages/genesis_test.go +++ b/turbo/stages/genesis_test.go @@ -176,7 +176,7 @@ func TestSetupGenesis(t *testing.T) { t.Run(test.name, func(t *testing.T) { t.Parallel() _, db, _ := temporal.NewTestDB(t, datadir.New(tmpdir), nil) - blockReader := freezeblocks.NewBlockReader(freezeblocks.NewRoSnapshots(ethconfig.BlocksFreezing{Enabled: false}, "", log.New()), freezeblocks.NewBorRoSnapshots(ethconfig.BlocksFreezing{Enabled: false}, "", log.New())) + blockReader := freezeblocks.NewBlockReader(freezeblocks.NewRoSnapshots(ethconfig.BlocksFreezing{Enabled: false}, "", 1, log.New()), freezeblocks.NewBorRoSnapshots(ethconfig.BlocksFreezing{Enabled: false}, "", 1, log.New())) config, genesis, err := test.fn(db) // Check the return values. if !reflect.DeepEqual(err, test.wantErr) { diff --git a/turbo/stages/headerdownload/header_algos.go b/turbo/stages/headerdownload/header_algos.go index f9b64b074ea..3c6fc8fde54 100644 --- a/turbo/stages/headerdownload/header_algos.go +++ b/turbo/stages/headerdownload/header_algos.go @@ -623,12 +623,15 @@ func (hd *HeaderDownload) InsertHeader(hf FeedHeaderFunc, terminalTotalDifficult // InsertHeaders attempts to insert headers into the database, verifying them first // It returns true in the first return value if the system is "in sync" -func (hd *HeaderDownload) InsertHeaders(hf FeedHeaderFunc, terminalTotalDifficulty *big.Int, logPrefix string, logChannel <-chan time.Time, currentTime uint64) (bool, error) { +func (hd *HeaderDownload) InsertHeaders(hf FeedHeaderFunc, headerLimit uint, terminalTotalDifficulty *big.Int, logPrefix string, logChannel <-chan time.Time, currentTime uint64) (bool, error) { var more = true var err error var force bool var blocksToTTD uint64 var blockTime uint64 + + startHeight := hd.highestInDb + for more { if more, force, blocksToTTD, blockTime, err = hd.InsertHeader(hf, terminalTotalDifficulty, logPrefix, logChannel); err != nil { return false, err @@ -636,9 +639,13 @@ func (hd *HeaderDownload) InsertHeaders(hf FeedHeaderFunc, terminalTotalDifficul if force { return true, nil } + + if headerLimit > 0 && hd.highestInDb-startHeight > uint64(headerLimit) { + break + } } if blocksToTTD > 0 { - hd.logger.Info("Estimated to reaching TTD", "blocks", blocksToTTD) + hd.logger.Trace("Estimated to reaching TTD", "blocks", blocksToTTD) } hd.lock.RLock() defer hd.lock.RUnlock() diff --git a/turbo/stages/mock/mock_sentry.go b/turbo/stages/mock/mock_sentry.go index 33dbf572ea6..bc517eaaf12 100644 --- a/turbo/stages/mock/mock_sentry.go +++ b/turbo/stages/mock/mock_sentry.go @@ -51,6 +51,7 @@ import ( "github.com/ledgerwatch/erigon/eth/stagedsync" "github.com/ledgerwatch/erigon/eth/stagedsync/stages" "github.com/ledgerwatch/erigon/ethdb/prune" + "github.com/ledgerwatch/erigon/p2p" "github.com/ledgerwatch/erigon/p2p/sentry/sentry_multi_client" "github.com/ledgerwatch/erigon/params" "github.com/ledgerwatch/erigon/rlp" @@ -259,8 +260,8 @@ func MockWithEverything(tb testing.TB, gspec *types.Genesis, key *ecdsa.PrivateK cfg.HistoryV3 = histV3 erigonGrpcServeer := remotedbserver.NewKvServer(ctx, db, nil, nil, logger) - allSnapshots := freezeblocks.NewRoSnapshots(ethconfig.Defaults.Snapshot, dirs.Snap, logger) - allBorSnapshots := freezeblocks.NewBorRoSnapshots(ethconfig.Defaults.Snapshot, dirs.Snap, logger) + allSnapshots := freezeblocks.NewRoSnapshots(ethconfig.Defaults.Snapshot, dirs.Snap, 1, logger) + allBorSnapshots := freezeblocks.NewBorRoSnapshots(ethconfig.Defaults.Snapshot, dirs.Snap, 1, logger) mock := &MockSentry{ Ctx: ctx, cancel: ctxCancel, DB: db, agg: agg, tb: tb, @@ -411,9 +412,10 @@ func MockWithEverything(tb testing.TB, gspec *types.Genesis, key *ecdsa.PrivateK miningStatePos := stagedsync.NewProposingState(&cfg.Miner) miningStatePos.MiningConfig.Etherbase = param.SuggestedFeeRecipient proposingSync := stagedsync.New( + cfg.Sync, stagedsync.MiningStages(mock.Ctx, stagedsync.StageMiningCreateBlockCfg(mock.DB, miningStatePos, *mock.ChainConfig, mock.Engine, mock.txPoolDB, param, tmpdir, mock.BlockReader), - stagedsync.StageBorHeimdallCfg(mock.DB, snapDb, miningStatePos, *mock.ChainConfig, nil, mock.BlockReader, nil, nil, recents, signatures), + stagedsync.StageBorHeimdallCfg(mock.DB, snapDb, miningStatePos, *mock.ChainConfig, nil, mock.BlockReader, nil, nil, nil, recents, signatures), stagedsync.StageMiningExecCfg(mock.DB, miningStatePos, mock.Notifications.Events, *mock.ChainConfig, mock.Engine, &vm.Config{}, tmpdir, interrupt, param.PayloadId, mock.TxPool, mock.txPoolDB, mock.BlockReader), stagedsync.StageHashStateCfg(mock.DB, dirs, cfg.HistoryV3), stagedsync.StageTrieCfg(mock.DB, false, true, true, tmpdir, mock.BlockReader, nil, histV3, mock.agg), @@ -430,13 +432,14 @@ func MockWithEverything(tb testing.TB, gspec *types.Genesis, key *ecdsa.PrivateK blockRetire := freezeblocks.NewBlockRetire(1, dirs, mock.BlockReader, blockWriter, mock.DB, mock.ChainConfig, mock.Notifications.Events, logger) mock.Sync = stagedsync.New( + cfg.Sync, stagedsync.DefaultStages(mock.Ctx, - stagedsync.StageSnapshotsCfg(mock.DB, *mock.ChainConfig, dirs, blockRetire, snapshotsDownloader, mock.BlockReader, mock.Notifications.Events, mock.HistoryV3, mock.agg, false, nil), - stagedsync.StageHeadersCfg(mock.DB, mock.sentriesClient.Hd, mock.sentriesClient.Bd, *mock.ChainConfig, sendHeaderRequest, propagateNewBlockHashes, penalize, cfg.BatchSize, false, mock.BlockReader, blockWriter, dirs.Tmp, mock.Notifications, engine_helpers.NewForkValidatorMock(1), nil), - stagedsync.StageBorHeimdallCfg(mock.DB, snapDb, stagedsync.MiningState{}, *mock.ChainConfig, nil /* heimdallClient */, mock.BlockReader, nil, nil, recents, signatures), + stagedsync.StageSnapshotsCfg(mock.DB, *mock.ChainConfig, cfg.Sync, dirs, blockRetire, snapshotsDownloader, mock.BlockReader, mock.Notifications, mock.HistoryV3, mock.agg, false, nil), + stagedsync.StageHeadersCfg(mock.DB, mock.sentriesClient.Hd, mock.sentriesClient.Bd, *mock.ChainConfig, cfg.Sync, sendHeaderRequest, propagateNewBlockHashes, penalize, cfg.BatchSize, false, mock.BlockReader, blockWriter, dirs.Tmp, mock.Notifications, engine_helpers.NewForkValidatorMock(1), nil), + stagedsync.StageBorHeimdallCfg(mock.DB, snapDb, stagedsync.MiningState{}, *mock.ChainConfig, nil /* heimdallClient */, mock.BlockReader, nil, nil, nil, recents, signatures), stagedsync.StageBlockHashesCfg(mock.DB, mock.Dirs.Tmp, mock.ChainConfig, blockWriter), - stagedsync.StageBodiesCfg(mock.DB, mock.sentriesClient.Bd, sendBodyRequest, penalize, blockPropagator, cfg.Sync.BodyDownloadTimeoutSeconds, *mock.ChainConfig, mock.BlockReader, cfg.HistoryV3, blockWriter), - stagedsync.StageSendersCfg(mock.DB, mock.ChainConfig, false, dirs.Tmp, prune, mock.BlockReader, mock.sentriesClient.Hd), + stagedsync.StageBodiesCfg(mock.DB, mock.sentriesClient.Bd, sendBodyRequest, penalize, blockPropagator, cfg.Sync.BodyDownloadTimeoutSeconds, *mock.ChainConfig, mock.BlockReader, cfg.HistoryV3, blockWriter, nil), + stagedsync.StageSendersCfg(mock.DB, mock.ChainConfig, false, dirs.Tmp, prune, mock.BlockReader, mock.sentriesClient.Hd, nil), stagedsync.StageExecuteBlocksCfg( mock.DB, prune, @@ -471,9 +474,9 @@ func MockWithEverything(tb testing.TB, gspec *types.Genesis, key *ecdsa.PrivateK ) cfg.Genesis = gspec - pipelineStages := stages2.NewPipelineStages(mock.Ctx, db, &cfg, mock.sentriesClient, mock.Notifications, + pipelineStages := stages2.NewPipelineStages(mock.Ctx, db, &cfg, p2p.Config{}, mock.sentriesClient, mock.Notifications, snapshotsDownloader, mock.BlockReader, blockRetire, mock.agg, nil, forkValidator, logger, checkStateRoot) - mock.posStagedSync = stagedsync.New(pipelineStages, stagedsync.PipelineUnwindOrder, stagedsync.PipelinePruneOrder, logger) + mock.posStagedSync = stagedsync.New(cfg.Sync, pipelineStages, stagedsync.PipelineUnwindOrder, stagedsync.PipelinePruneOrder, logger) mock.Eth1ExecutionService = eth1.NewEthereumExecutionModule(mock.BlockReader, mock.DB, mock.posStagedSync, forkValidator, mock.ChainConfig, assembleBlockPOS, nil, mock.Notifications.Accumulator, mock.Notifications.StateChangesConsumer, logger, engine, histV3) @@ -494,9 +497,10 @@ func MockWithEverything(tb testing.TB, gspec *types.Genesis, key *ecdsa.PrivateK mock.PendingBlocks = miner.PendingResultCh mock.MinedBlocks = miner.MiningResultCh mock.MiningSync = stagedsync.New( + cfg.Sync, stagedsync.MiningStages(mock.Ctx, stagedsync.StageMiningCreateBlockCfg(mock.DB, miner, *mock.ChainConfig, mock.Engine, nil, nil, dirs.Tmp, mock.BlockReader), - stagedsync.StageBorHeimdallCfg(mock.DB, snapDb, miner, *mock.ChainConfig, nil /*heimdallClient*/, mock.BlockReader, nil, nil, recents, signatures), + stagedsync.StageBorHeimdallCfg(mock.DB, snapDb, miner, *mock.ChainConfig, nil /*heimdallClient*/, mock.BlockReader, nil, nil, nil, recents, signatures), stagedsync.StageMiningExecCfg(mock.DB, miner, nil, *mock.ChainConfig, mock.Engine, &vm.Config{}, dirs.Tmp, nil, 0, mock.TxPool, nil, mock.BlockReader), stagedsync.StageHashStateCfg(mock.DB, dirs, cfg.HistoryV3), stagedsync.StageTrieCfg(mock.DB, false, true, false, dirs.Tmp, mock.BlockReader, mock.sentriesClient.Hd, cfg.HistoryV3, mock.agg), diff --git a/turbo/stages/stageloop.go b/turbo/stages/stageloop.go index de8fee1d567..f8fcb5ccf85 100644 --- a/turbo/stages/stageloop.go +++ b/turbo/stages/stageloop.go @@ -146,7 +146,7 @@ func StageLoopIteration(ctx context.Context, db kv.RwDB, tx kv.RwTx, sync *stage return err } } - err = sync.Run(db, tx, initialCycle) + _, err = sync.Run(db, tx, initialCycle) if err != nil { return err } @@ -349,7 +349,7 @@ func MiningStep(ctx context.Context, kv kv.RwDB, mining *stagedsync.Sync, tmpDir miningBatch := membatchwithdb.NewMemoryBatch(tx, tmpDir) defer miningBatch.Rollback() - if err = mining.Run(nil, miningBatch, false /* firstCycle */); err != nil { + if _, err = mining.Run(nil, miningBatch, false /* firstCycle */); err != nil { return err } tx.Rollback() @@ -477,19 +477,36 @@ func NewDefaultStages(ctx context.Context, // Hence we run it in the test mode. runInTestMode := cfg.ImportMode - var loopBreakCheck func() bool + var loopBreakCheck func(int) bool if heimdallClient != nil && flags.Milestone { - loopBreakCheck = heimdall.MilestoneRewindPending + loopBreakCheck = func(int) bool { + return heimdall.MilestoneRewindPending() + } + } + + if cfg.Sync.LoopBlockLimit > 0 { + previousBreakCheck := loopBreakCheck + loopBreakCheck = func(loopCount int) bool { + if loopCount > int(cfg.Sync.LoopBlockLimit) { + return true + } + + if previousBreakCheck != nil { + return previousBreakCheck(loopCount) + } + + return false + } } return stagedsync.DefaultStages(ctx, - stagedsync.StageSnapshotsCfg(db, *controlServer.ChainConfig, dirs, blockRetire, snapDownloader, blockReader, notifications.Events, cfg.HistoryV3, agg, cfg.InternalCL && cfg.CaplinConfig.Backfilling, silkworm), - stagedsync.StageHeadersCfg(db, controlServer.Hd, controlServer.Bd, *controlServer.ChainConfig, controlServer.SendHeaderRequest, controlServer.PropagateNewBlockHashes, controlServer.Penalize, cfg.BatchSize, p2pCfg.NoDiscovery, blockReader, blockWriter, dirs.Tmp, notifications, forkValidator, loopBreakCheck), - stagedsync.StageBorHeimdallCfg(db, snapDb, stagedsync.MiningState{}, *controlServer.ChainConfig, heimdallClient, blockReader, controlServer.Hd, controlServer.Penalize, recents, signatures), + stagedsync.StageSnapshotsCfg(db, *controlServer.ChainConfig, cfg.Sync, dirs, blockRetire, snapDownloader, blockReader, notifications, cfg.HistoryV3, agg, cfg.InternalCL && cfg.CaplinConfig.Backfilling, silkworm), + stagedsync.StageHeadersCfg(db, controlServer.Hd, controlServer.Bd, *controlServer.ChainConfig, cfg.Sync, controlServer.SendHeaderRequest, controlServer.PropagateNewBlockHashes, controlServer.Penalize, cfg.BatchSize, p2pCfg.NoDiscovery, blockReader, blockWriter, dirs.Tmp, notifications, forkValidator, loopBreakCheck), + stagedsync.StageBorHeimdallCfg(db, snapDb, stagedsync.MiningState{}, *controlServer.ChainConfig, heimdallClient, blockReader, controlServer.Hd, controlServer.Penalize, loopBreakCheck, recents, signatures), stagedsync.StageBlockHashesCfg(db, dirs.Tmp, controlServer.ChainConfig, blockWriter), - stagedsync.StageBodiesCfg(db, controlServer.Bd, controlServer.SendBodyRequest, controlServer.Penalize, controlServer.BroadcastNewBlock, cfg.Sync.BodyDownloadTimeoutSeconds, *controlServer.ChainConfig, blockReader, cfg.HistoryV3, blockWriter), - stagedsync.StageSendersCfg(db, controlServer.ChainConfig, false, dirs.Tmp, cfg.Prune, blockReader, controlServer.Hd), + stagedsync.StageBodiesCfg(db, controlServer.Bd, controlServer.SendBodyRequest, controlServer.Penalize, controlServer.BroadcastNewBlock, cfg.Sync.BodyDownloadTimeoutSeconds, *controlServer.ChainConfig, blockReader, cfg.HistoryV3, blockWriter, loopBreakCheck), + stagedsync.StageSendersCfg(db, controlServer.ChainConfig, false, dirs.Tmp, cfg.Prune, blockReader, controlServer.Hd, loopBreakCheck), stagedsync.StageExecuteBlocksCfg( db, cfg.Prune, @@ -523,6 +540,7 @@ func NewDefaultStages(ctx context.Context, func NewPipelineStages(ctx context.Context, db kv.RwDB, cfg *ethconfig.Config, + p2pCfg p2p.Config, controlServer *sentry_multi_client.MultiClient, notifications *shards.Notifications, snapDownloader proto_downloader.DownloaderClient, @@ -541,10 +559,64 @@ func NewPipelineStages(ctx context.Context, // Hence we run it in the test mode. runInTestMode := cfg.ImportMode - return stagedsync.PipelineStages(ctx, - stagedsync.StageSnapshotsCfg(db, *controlServer.ChainConfig, dirs, blockRetire, snapDownloader, blockReader, notifications.Events, cfg.HistoryV3, agg, cfg.InternalCL && cfg.CaplinConfig.Backfilling, silkworm), + var loopBreakCheck func(int) bool + + if cfg.Sync.LoopBlockLimit > 0 { + previousBreakCheck := loopBreakCheck + loopBreakCheck = func(loopCount int) bool { + if loopCount > int(cfg.Sync.LoopBlockLimit) { + return true + } + + if previousBreakCheck != nil { + return previousBreakCheck(loopCount) + } + + return false + } + } + + if len(cfg.Sync.UploadLocation) == 0 { + return stagedsync.PipelineStages(ctx, + stagedsync.StageSnapshotsCfg(db, *controlServer.ChainConfig, cfg.Sync, dirs, blockRetire, snapDownloader, blockReader, notifications, cfg.HistoryV3, agg, cfg.InternalCL && cfg.CaplinConfig.Backfilling, silkworm), + stagedsync.StageBlockHashesCfg(db, dirs.Tmp, controlServer.ChainConfig, blockWriter), + stagedsync.StageSendersCfg(db, controlServer.ChainConfig, false, dirs.Tmp, cfg.Prune, blockReader, controlServer.Hd, loopBreakCheck), + stagedsync.StageExecuteBlocksCfg( + db, + cfg.Prune, + cfg.BatchSize, + nil, + controlServer.ChainConfig, + controlServer.Engine, + &vm.Config{}, + notifications.Accumulator, + cfg.StateStream, + /*stateStream=*/ false, + cfg.HistoryV3, + dirs, + blockReader, + controlServer.Hd, + cfg.Genesis, + cfg.Sync, + agg, + silkwormForExecutionStage(silkworm, cfg), + ), + stagedsync.StageHashStateCfg(db, dirs, cfg.HistoryV3), + stagedsync.StageTrieCfg(db, checkStateRoot, true, false, dirs.Tmp, blockReader, controlServer.Hd, cfg.HistoryV3, agg), + stagedsync.StageHistoryCfg(db, cfg.Prune, dirs.Tmp), + stagedsync.StageLogIndexCfg(db, cfg.Prune, dirs.Tmp), + stagedsync.StageCallTracesCfg(db, cfg.Prune, 0, dirs.Tmp), + stagedsync.StageTxLookupCfg(db, cfg.Prune, dirs.Tmp, controlServer.ChainConfig.Bor, blockReader), + stagedsync.StageFinishCfg(db, dirs.Tmp, forkValidator), + runInTestMode) + } + + return stagedsync.UploaderPipelineStages(ctx, + stagedsync.StageSnapshotsCfg(db, *controlServer.ChainConfig, cfg.Sync, dirs, blockRetire, snapDownloader, blockReader, notifications, cfg.HistoryV3, agg, cfg.InternalCL && cfg.CaplinConfig.Backfilling, silkworm), + stagedsync.StageHeadersCfg(db, controlServer.Hd, controlServer.Bd, *controlServer.ChainConfig, cfg.Sync, controlServer.SendHeaderRequest, controlServer.PropagateNewBlockHashes, controlServer.Penalize, cfg.BatchSize, p2pCfg.NoDiscovery, blockReader, blockWriter, dirs.Tmp, notifications, forkValidator, loopBreakCheck), stagedsync.StageBlockHashesCfg(db, dirs.Tmp, controlServer.ChainConfig, blockWriter), - stagedsync.StageSendersCfg(db, controlServer.ChainConfig, false, dirs.Tmp, cfg.Prune, blockReader, controlServer.Hd), + stagedsync.StageSendersCfg(db, controlServer.ChainConfig, false, dirs.Tmp, cfg.Prune, blockReader, controlServer.Hd, loopBreakCheck), + stagedsync.StageBodiesCfg(db, controlServer.Bd, controlServer.SendBodyRequest, controlServer.Penalize, controlServer.BroadcastNewBlock, cfg.Sync.BodyDownloadTimeoutSeconds, *controlServer.ChainConfig, blockReader, cfg.HistoryV3, blockWriter, loopBreakCheck), stagedsync.StageExecuteBlocksCfg( db, cfg.Prune, @@ -573,17 +645,19 @@ func NewPipelineStages(ctx context.Context, stagedsync.StageTxLookupCfg(db, cfg.Prune, dirs.Tmp, controlServer.ChainConfig.Bor, blockReader), stagedsync.StageFinishCfg(db, dirs.Tmp, forkValidator), runInTestMode) + } func NewInMemoryExecution(ctx context.Context, db kv.RwDB, cfg *ethconfig.Config, controlServer *sentry_multi_client.MultiClient, dirs datadir.Dirs, notifications *shards.Notifications, blockReader services.FullBlockReader, blockWriter *blockio.BlockWriter, agg *state.AggregatorV3, silkworm *silkworm.Silkworm, logger log.Logger) *stagedsync.Sync { return stagedsync.New( + cfg.Sync, stagedsync.StateStages(ctx, - stagedsync.StageHeadersCfg(db, controlServer.Hd, controlServer.Bd, *controlServer.ChainConfig, controlServer.SendHeaderRequest, controlServer.PropagateNewBlockHashes, controlServer.Penalize, cfg.BatchSize, false, blockReader, blockWriter, dirs.Tmp, nil, nil, nil), - stagedsync.StageBodiesCfg(db, controlServer.Bd, controlServer.SendBodyRequest, controlServer.Penalize, controlServer.BroadcastNewBlock, cfg.Sync.BodyDownloadTimeoutSeconds, *controlServer.ChainConfig, blockReader, cfg.HistoryV3, blockWriter), + stagedsync.StageHeadersCfg(db, controlServer.Hd, controlServer.Bd, *controlServer.ChainConfig, cfg.Sync, controlServer.SendHeaderRequest, controlServer.PropagateNewBlockHashes, controlServer.Penalize, cfg.BatchSize, false, blockReader, blockWriter, dirs.Tmp, nil, nil, nil), + stagedsync.StageBodiesCfg(db, controlServer.Bd, controlServer.SendBodyRequest, controlServer.Penalize, controlServer.BroadcastNewBlock, cfg.Sync.BodyDownloadTimeoutSeconds, *controlServer.ChainConfig, blockReader, cfg.HistoryV3, blockWriter, nil), stagedsync.StageBlockHashesCfg(db, dirs.Tmp, controlServer.ChainConfig, blockWriter), - stagedsync.StageSendersCfg(db, controlServer.ChainConfig, true, dirs.Tmp, cfg.Prune, blockReader, controlServer.Hd), + stagedsync.StageSendersCfg(db, controlServer.ChainConfig, true, dirs.Tmp, cfg.Prune, blockReader, controlServer.Hd, nil), stagedsync.StageExecuteBlocksCfg( db, cfg.Prune,