From 22a9c6d95fe993ab36a3df554b5a7c7361f48993 Mon Sep 17 00:00:00 2001 From: Robert Pirtle Date: Fri, 5 Jan 2024 11:25:53 -0800 Subject: [PATCH] feat(cli): add shard CLI command (#1785) * stub out shard CLI command * prune blocks before and after desired range * update rollback to use patched cometbft * temp override for local patched versions * handle pruning cometbft & block store state * include docs & support -1 for "latest" * update changelog * add --only-app-state flag to match cosmos-sdk prune cmd * give -1 magic number a name & reuse home from ctx * refactor to only open state.db & blockstore.db once * write rollback progress to one line * prevent attempting rollback of future blocks * make shard inclusive of endblock * use tagged cosmo-sdk & cometbft versions --- CHANGELOG.md | 2 + cmd/kava/cmd/root.go | 1 + cmd/kava/cmd/shard.go | 232 ++++++++++++++++++++++++++++++++++++++++++ go.mod | 2 +- 4 files changed, 236 insertions(+), 1 deletion(-) create mode 100644 cmd/kava/cmd/shard.go diff --git a/CHANGELOG.md b/CHANGELOG.md index 751a707054..6dd3b1fd84 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,6 +41,7 @@ Ref: https://keepachangelog.com/en/1.0.0/ - (metrics) [#1668] Adds non-state breaking x/metrics module for custom telemetry. - (metrics) [#1669] Add performance timing metrics to all Begin/EndBlockers - (rocksdb) [#1767] Fix resolution of rocksdb database path +- (cli) [#1785] Add `shard` CLI command to support creating partitions of data for standalone nodes ## [v0.23.2] @@ -256,6 +257,7 @@ the [changelog](https://github.com/cosmos/cosmos-sdk/blob/v0.38.4/CHANGELOG.md). - [#257](https://github.com/Kava-Labs/kava/pulls/257) Include scripts to run large-scale simulations remotely using aws-batch +[#1785]: https://github.com/Kava-Labs/kava/pull/1785 [#1767]: https://github.com/Kava-Labs/kava/pull/1767 [#1669]: https://github.com/Kava-Labs/kava/pull/1669 [#1668]: https://github.com/Kava-Labs/kava/pull/1668 diff --git a/cmd/kava/cmd/root.go b/cmd/kava/cmd/root.go index 0a8cc71d0b..0894bbbd9c 100644 --- a/cmd/kava/cmd/root.go +++ b/cmd/kava/cmd/root.go @@ -126,5 +126,6 @@ func addSubCmds(rootCmd *cobra.Command, encodingConfig params.EncodingConfig, de newQueryCmd(), newTxCmd(), kavaclient.KeyCommands(app.DefaultNodeHome), + newShardCmd(opts), ) } diff --git a/cmd/kava/cmd/shard.go b/cmd/kava/cmd/shard.go new file mode 100644 index 0000000000..c43ad7ddc1 --- /dev/null +++ b/cmd/kava/cmd/shard.go @@ -0,0 +1,232 @@ +package cmd + +import ( + "fmt" + "strings" + + "github.com/spf13/cobra" + + dbm "github.com/cometbft/cometbft-db" + + "github.com/cosmos/cosmos-sdk/client" + "github.com/cosmos/cosmos-sdk/client/flags" + pruningtypes "github.com/cosmos/cosmos-sdk/pruning/types" + "github.com/cosmos/cosmos-sdk/server" + "github.com/cosmos/cosmos-sdk/store/rootmulti" + + tmconfig "github.com/tendermint/tendermint/config" + "github.com/tendermint/tendermint/node" + tmstate "github.com/tendermint/tendermint/state" + "github.com/tendermint/tendermint/store" + + ethermintserver "github.com/evmos/ethermint/server" +) + +const ( + flagShardStartBlock = "start" + flagShardEndBlock = "end" + flagShardOnlyAppState = "only-app-state" + // TODO: --preserve flag for creating & operating on a copy? + + // allow using -1 to mean "latest" (perform no rollbacks) + shardEndBlockLatest = -1 +) + +func newShardCmd(opts ethermintserver.StartOptions) *cobra.Command { + cmd := &cobra.Command{ + Use: "shard --home --start --end [--only-app-state]", + Short: "Strip all blocks from the database outside of a given range", + Long: `shard opens a local kava home directory's databases and removes all blocks outside a range defined by --start and --end. The range is inclusive of the end block. + +It works by first rolling back the latest state to the block before the end block, and then by pruning all state before the start block. + +Setting the end block to -1 signals to keep the latest block (no rollbacks). + +The --only-app-state flag can be used to skip the pruning of the blockstore and cometbft state. This matches the functionality of the cosmos-sdk's "prune" command. Note that rolled back blocks will still affect all stores. + +WARNING: this is a destructive action.`, + Example: `Create a 1M block data shard (keeps blocks kava 1,000,000 to 2,000,000) +$ kava shard --home path/to/.kava --start 1000000 --end 2000000 + +Prune all blocks up to 5,000,000: +$ kava shard --home path/to/.kava --start 5000000 --end -1 + +Prune first 1M blocks _without_ affecting blockstore or cometBFT state: +$ kava shard --home path/to/.kava --start 1000000 --end -1 --only-app-state`, + RunE: func(cmd *cobra.Command, args []string) error { + // read & validate flags + startBlock, err := cmd.Flags().GetInt64(flagShardStartBlock) + if err != nil { + return err + } + endBlock, err := cmd.Flags().GetInt64(flagShardEndBlock) + if err != nil { + return err + } + if (endBlock == 0 || endBlock < startBlock) && endBlock != shardEndBlockLatest { + return fmt.Errorf("end block (%d) must be greater than start block (%d)", endBlock, startBlock) + } + onlyAppState, err := cmd.Flags().GetBool(flagShardOnlyAppState) + if err != nil { + return err + } + + clientCtx := client.GetClientContextFromCmd(cmd) + + ctx := server.GetServerContextFromCmd(cmd) + ctx.Config.SetRoot(clientCtx.HomeDir) + + ////////////////////////////// + // Rollback state to endBlock + ////////////////////////////// + + // connect to database + db, err := opts.DBOpener(ctx.Viper, clientCtx.HomeDir, server.GetAppDBBackend(ctx.Viper)) + if err != nil { + return err + } + + // close db connection when done + defer func() { + if err := db.Close(); err != nil { + ctx.Logger.Error("error closing db", "error", err.Error()) + } + }() + + // get the multistore + app := opts.AppCreator(ctx.Logger, db, nil, ctx.Viper) + cms := app.CommitMultiStore() + multistore, ok := cms.(*rootmulti.Store) + if !ok { + return fmt.Errorf("only sharding of rootmulti.Store type is supported") + } + + // handle desired endblock being latest + latest := multistore.LatestVersion() + fmt.Printf("latest height: %d\n", latest) + if endBlock == shardEndBlockLatest { + endBlock = latest + } + shardSize := endBlock - startBlock + 1 + + // error if requesting block range the database does not have + if endBlock > latest { + return fmt.Errorf("data does not contain end block (%d): latest version is %d", endBlock, latest) + } + + fmt.Printf("pruning data in %s down to heights %d - %d (%d blocks)\n", clientCtx.HomeDir, startBlock, endBlock, shardSize) + + // set pruning options to prevent no-ops from `PruneStores` + multistore.SetPruning(pruningtypes.PruningOptions{KeepRecent: uint64(shardSize), Interval: 0}) + + // rollback application state + if err = multistore.RollbackToVersion(endBlock); err != nil { + return fmt.Errorf("failed to rollback application state: %s", err) + } + + // open block store & cometbft state + blockStore, stateStore, err := openCometBftDbs(ctx.Config) + if err != nil { + return fmt.Errorf("failed to open cometbft dbs: %s", err) + } + + // prep for outputting progress repeatedly to same line + needsRollback := endBlock < latest + progress := "rolling back blockstore & cometbft state to height %d" + numChars := len(fmt.Sprintf(progress, latest)) + clearLine := fmt.Sprintf("\r%s\r", strings.Repeat(" ", numChars)) + printRollbackProgress := func(h int64) { + fmt.Print(clearLine) + fmt.Printf(progress, h) + } + + // rollback tendermint db + height := latest + for height > endBlock { + printRollbackProgress(height - 1) + height, _, err = tmstate.Rollback(blockStore, stateStore, true) + if err != nil { + return fmt.Errorf("failed to rollback tendermint state: %w", err) + } + } + + if needsRollback { + fmt.Println() + } else { + fmt.Printf("latest store height is already %d\n", latest) + } + + ////////////////////////////// + // Prune blocks to startBlock + ////////////////////////////// + + // enumerate all heights to prune + pruneHeights := make([]int64, 0, latest-shardSize) + for i := int64(1); i < startBlock; i++ { + pruneHeights = append(pruneHeights, i) + } + + if len(pruneHeights) > 0 { + // prune application state + fmt.Printf("pruning application state to height %d\n", startBlock) + if err := multistore.PruneStores(true, pruneHeights); err != nil { + return fmt.Errorf("failed to prune application state: %s", err) + } + } + + // get starting block of block store + baseBlock := blockStore.Base() + + // only prune if data exists, otherwise blockStore.PruneBlocks will panic + if !onlyAppState && baseBlock < startBlock { + // prune block store + fmt.Printf("pruning block store from %d - %d\n", baseBlock, startBlock) + if _, err := blockStore.PruneBlocks(startBlock); err != nil { + return fmt.Errorf("failed to prune block store (retainHeight=%d): %s", startBlock, err) + } + + // prune cometbft state + fmt.Printf("pruning cometbft state from %d - %d\n", baseBlock, startBlock) + if err := stateStore.PruneStates(baseBlock, startBlock); err != nil { + return fmt.Errorf("failed to prune cometbft state store (%d - %d): %s", baseBlock, startBlock, err) + } + } else { + fmt.Printf("blockstore and cometbft state begins at block %d\n", baseBlock) + } + + // TODO: db compaction + + return nil + }, + } + + cmd.Flags().String(flags.FlagHome, opts.DefaultNodeHome, "The application home directory") + cmd.Flags().Int64(flagShardStartBlock, 1, "Start block of data shard (inclusive)") + cmd.Flags().Int64(flagShardEndBlock, 0, "End block of data shard (inclusive)") + cmd.Flags().Bool(flagShardOnlyAppState, false, "Skip pruning of blockstore & cometbft state") + + return cmd +} + +// inspired by https://github.com/Kava-Labs/cometbft/blob/277b0853db3f67865a55aa1c54f59790b5f591be/node/node.go#L234 +func openCometBftDbs(config *tmconfig.Config) (blockStore *store.BlockStore, stateStore tmstate.Store, err error) { + dbProvider := node.DefaultDBProvider + + var blockStoreDB dbm.DB + blockStoreDB, err = dbProvider(&node.DBContext{ID: "blockstore", Config: config}) + if err != nil { + return + } + blockStore = store.NewBlockStore(blockStoreDB) + + stateDB, err := dbProvider(&node.DBContext{ID: "state", Config: config}) + if err != nil { + return + } + + stateStore = tmstate.NewStore(stateDB, tmstate.StoreOptions{ + DiscardABCIResponses: config.Storage.DiscardABCIResponses, + }) + + return +} diff --git a/go.mod b/go.mod index dc55eaa35f..49a283bfd8 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ require ( cosmossdk.io/errors v1.0.0-beta.7 cosmossdk.io/math v1.0.0-rc.0 github.com/cenkalti/backoff/v4 v4.1.3 + github.com/cometbft/cometbft-db v0.9.1 github.com/cosmos/cosmos-proto v1.0.0-beta.3 github.com/cosmos/cosmos-sdk v0.46.16 github.com/cosmos/go-bip39 v1.0.0 @@ -70,7 +71,6 @@ require ( github.com/cockroachdb/redact v1.0.8 // indirect github.com/cockroachdb/sentry-go v0.6.1-cockroachdb.2 // indirect github.com/coinbase/rosetta-sdk-go v0.7.9 // indirect - github.com/cometbft/cometbft-db v0.9.1 // indirect github.com/confio/ics23/go v0.9.0 // indirect github.com/cosmos/btcutil v1.0.5 // indirect github.com/cosmos/cosmos-db v1.0.0 // indirect