Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable local statesync/snapshot restore #13521

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion baseapp/baseapp.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ type BaseApp struct { //nolint: maligned
fauxMerkleMode bool // if true, IAVL MountStores uses MountStoresDB for simulation speed.

// manages snapshots, i.e. dumps of app state at certain intervals
snapshotManager *snapshots.Manager
snapshotManager *snapshots.Manager
snapshotRestoreHeight uint64 // local snapshot height to restore

// volatile states:
//
Expand Down
72 changes: 72 additions & 0 deletions server/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package server
// DONTCOVER

import (
"errors"
"fmt"
"net"
"net/http"
Expand Down Expand Up @@ -33,6 +34,7 @@ import (
"github.com/cosmos/cosmos-sdk/server/types"
"github.com/cosmos/cosmos-sdk/telemetry"
sdktypes "github.com/cosmos/cosmos-sdk/types"
abci "github.com/tendermint/tendermint/abci/types"
)

const (
Expand Down Expand Up @@ -61,6 +63,7 @@ const (
// state sync-related flags
FlagStateSyncSnapshotInterval = "state-sync.snapshot-interval"
FlagStateSyncSnapshotKeepRecent = "state-sync.snapshot-keep-recent"
FlagStateSyncRestoreHeight = "state-sync.local-restore-height"

// api-related flags
FlagAPIEnable = "api.enable"
Expand Down Expand Up @@ -186,6 +189,7 @@ is performed. Note, when enabled, gRPC will also be automatically enabled.

cmd.Flags().Uint64(FlagStateSyncSnapshotInterval, 0, "State sync snapshot interval")
cmd.Flags().Uint32(FlagStateSyncSnapshotKeepRecent, 2, "State sync snapshot to keep")
cmd.Flags().Uint64(FlagStateSyncRestoreHeight, 0, "Height of local State Sync backup to lazy restore (restore based on local snapshots)")

// add support for all Tendermint-specific command line options
tcmd.AddNodeFlags(cmd)
Expand Down Expand Up @@ -244,6 +248,63 @@ func startStandAlone(ctx *Context, appCreator types.AppCreator) error {
return WaitForQuitSignals()
}

// lazyLoadLocalSnapshot attempts to restore one of the operator's local State Sync
// snapshots based on the provided State Sync snapshot height, ssRestoreHeight. This
// should be executed when an operator already has State Sync enabled and does not want
// to directly rely on the State Sync mechanism via the P2P network. Instead, we load state
// based on the existing local snapshots. An error is returned if the snapshot does not exist
// or fails to load.
func lazyLoadLocalSnapshot(ctx *Context, app types.Application, ssRestoreHeight uint64) error {
ctx.Logger.Info("Local State Sync restore snapshot", "height", ssRestoreHeight)

// StateSync.RestoreHeight triggers tendermint to restore store and block state
ctx.Config.StateSync.RestoreHeight = ssRestoreHeight

ctx.Logger.Info("Searching local snapshots")
resp := app.ListSnapshots(abci.RequestListSnapshots{})
if len(resp.Snapshots) < 1 {
ctx.Logger.Error("No available snapshots")
return errors.New("No available snapshots")
}
var snapshot *abci.Snapshot
chillyvee marked this conversation as resolved.
Show resolved Hide resolved
for i, s := range resp.Snapshots {
ctx.Logger.Info("Found local State Sync snapshot", "snapshot", i, "height", s.Height)
if ssRestoreHeight == s.Height {
ctx.Logger.Info("Restoring selected local State Sync snapshot", "snapshot", i, "height", s.Height)
snapshot = s
}
}
if snapshot == nil {
ctx.Logger.Error("Could not find local snapshot", "height", ssRestoreHeight)
return errors.New("Could not find local snapshot")
}

app.OfferSnapshot(abci.RequestOfferSnapshot{Snapshot: snapshot})

for index := uint32(0); index < snapshot.Chunks; index++ {
ctx.Logger.Info(fmt.Sprintf("Loading Chunk %d", index))
respChunk := app.LoadSnapshotChunk(abci.RequestLoadSnapshotChunk{
Height: snapshot.Height,
Format: snapshot.Format,
Chunk: index,
})
ctx.Logger.Info(fmt.Sprintf("Apply Chunk %d", index))
applyRes := app.ApplySnapshotChunk(abci.RequestApplySnapshotChunk{
Index: index,
Chunk: respChunk.Chunk,
})
if applyRes.Result != abci.ResponseApplySnapshotChunk_ACCEPT {
ctx.Logger.Error("Local State Sync snapshot chunk apply fail", "snapshot", index, "reason", applyRes)
return errors.New("Snapshot chunk apply fail")
} else {
ctx.Logger.Info("Local State Sync snapshot chunk apply ok", "snapshot", index)
}

}
ctx.Logger.Info(fmt.Sprintf("Local Statesync Restore Complete"))
return nil
}

func startInProcess(ctx *Context, clientCtx client.Context, appCreator types.AppCreator) error {
cfg := ctx.Config
home := cfg.RootDir
Expand Down Expand Up @@ -316,6 +377,17 @@ func startInProcess(ctx *Context, clientCtx client.Context, appCreator types.App
ctx.Logger.Info("starting node in gRPC only mode; Tendermint is disabled")
config.GRPC.Enable = true
} else {
ssRestoreHeight := ctx.Viper.GetUint64(FlagStateSyncRestoreHeight)
if ssRestoreHeight == 0 {
// no restore height specified
ctx.Logger.Debug("**** No local snapshot restore height specified")
} else {
err := lazyLoadLocalSnapshot(ctx, app, ssRestoreHeight)
if err != nil {
return err
}
}

ctx.Logger.Info("starting node with ABCI Tendermint in-process")

tmNode, err = node.NewNode(
Expand Down