diff --git a/etcdutl/etcdutl/snapshot_command.go b/etcdutl/etcdutl/snapshot_command.go index 9e557495e5c0..eeffb93bbd84 100644 --- a/etcdutl/etcdutl/snapshot_command.go +++ b/etcdutl/etcdutl/snapshot_command.go @@ -22,6 +22,7 @@ import ( "go.etcd.io/etcd/etcdutl/v3/snapshot" "go.etcd.io/etcd/pkg/v3/cobrautl" + "go.etcd.io/etcd/server/v3/storage/backend" "go.etcd.io/etcd/server/v3/storage/datadir" ) @@ -38,6 +39,7 @@ var ( restorePeerURLs string restoreName string skipHashCheck bool + initialMmapSize = backend.InitialMmapSize markCompacted bool revisionBump uint64 ) @@ -77,6 +79,7 @@ func NewSnapshotRestoreCommand() *cobra.Command { cmd.Flags().StringVar(&restorePeerURLs, "initial-advertise-peer-urls", defaultInitialAdvertisePeerURLs, "List of this member's peer URLs to advertise to the rest of the cluster") cmd.Flags().StringVar(&restoreName, "name", defaultName, "Human-readable name for this member") cmd.Flags().BoolVar(&skipHashCheck, "skip-hash-check", false, "Ignore snapshot integrity hash value (required if copied from data directory)") + cmd.Flags().Uint64Var(&initialMmapSize, "initial-memory-map-size", initialMmapSize, "Initial memory map size of the database in bytes. It uses the default value if not defined or defined to 0") cmd.Flags().Uint64Var(&revisionBump, "bump-revision", 0, "How much to increase the latest revision after restore") cmd.Flags().BoolVar(&markCompacted, "mark-compacted", false, "Mark the latest revision after restore as the point of scheduled compaction (required if --bump-revision > 0, disallowed otherwise)") @@ -104,7 +107,7 @@ func SnapshotStatusCommandFunc(cmd *cobra.Command, args []string) { func snapshotRestoreCommandFunc(_ *cobra.Command, args []string) { SnapshotRestoreCommandFunc(restoreCluster, restoreClusterToken, restoreDataDir, restoreWalDir, - restorePeerURLs, restoreName, skipHashCheck, revisionBump, markCompacted, args) + restorePeerURLs, restoreName, skipHashCheck, initialMmapSize, revisionBump, markCompacted, args) } func SnapshotRestoreCommandFunc(restoreCluster string, @@ -114,6 +117,7 @@ func SnapshotRestoreCommandFunc(restoreCluster string, restorePeerURLs string, restoreName string, skipHashCheck bool, + initialMmapSize uint64, revisionBump uint64, markCompacted bool, args []string) { @@ -149,6 +153,7 @@ func SnapshotRestoreCommandFunc(restoreCluster string, InitialCluster: restoreCluster, InitialClusterToken: restoreClusterToken, SkipHashCheck: skipHashCheck, + InitialMmapSize: initialMmapSize, RevisionBump: revisionBump, MarkCompacted: markCompacted, }); err != nil { diff --git a/etcdutl/snapshot/v3_snapshot.go b/etcdutl/snapshot/v3_snapshot.go index 64d16acd75c8..f8aa90c1f9a4 100644 --- a/etcdutl/snapshot/v3_snapshot.go +++ b/etcdutl/snapshot/v3_snapshot.go @@ -83,7 +83,8 @@ type v3Manager struct { snapDir string cl *membership.RaftCluster - skipHashCheck bool + skipHashCheck bool + initialMmapSize uint64 } // hasChecksum returns "true" if the file size "n" @@ -204,6 +205,9 @@ type RestoreConfig struct { // (required if copied from data directory). SkipHashCheck bool + // InitialMmapSize is the database initial memory map size. + InitialMmapSize uint64 + // RevisionBump is the amount to increase the latest revision after restore, // to allow administrators to trick clients into thinking that revision never decreased. // If 0, revision bumping is skipped. @@ -233,6 +237,7 @@ func (s *v3Manager) Restore(cfg RestoreConfig) error { PeerURLs: pURLs, InitialPeerURLsMap: ics, InitialClusterToken: cfg.InitialClusterToken, + InitialMmapSize: cfg.InitialMmapSize, } if err = srv.VerifyBootstrap(); err != nil { return err @@ -263,6 +268,7 @@ func (s *v3Manager) Restore(cfg RestoreConfig) error { s.walDir = walDir s.snapDir = filepath.Join(dataDir, "member", "snap") s.skipHashCheck = cfg.SkipHashCheck + s.initialMmapSize = cfg.InitialMmapSize s.lg.Info( "restoring snapshot", @@ -270,6 +276,7 @@ func (s *v3Manager) Restore(cfg RestoreConfig) error { zap.String("wal-dir", s.walDir), zap.String("data-dir", dataDir), zap.String("snap-dir", s.snapDir), + zap.Uint64("initial-memory-map-size", s.initialMmapSize), ) if err = s.saveDB(); err != nil { @@ -297,6 +304,7 @@ func (s *v3Manager) Restore(cfg RestoreConfig) error { zap.String("wal-dir", s.walDir), zap.String("data-dir", dataDir), zap.String("snap-dir", s.snapDir), + zap.Uint64("initial-memory-map-size", s.initialMmapSize), ) return verify.VerifyIfEnabled(verify.Config{ @@ -317,7 +325,7 @@ func (s *v3Manager) saveDB() error { return err } - be := backend.NewDefaultBackend(s.lg, s.outDbPath()) + be := backend.NewDefaultBackend(s.lg, s.outDbPath(), backend.WithMmapSize(s.initialMmapSize)) defer be.Close() err = schema.NewMembershipBackend(s.lg, be).TrimMembershipFromBackend() @@ -472,7 +480,7 @@ func (s *v3Manager) saveWALAndSnap() (*raftpb.HardState, error) { } // add members again to persist them to the backend we create. - be := backend.NewDefaultBackend(s.lg, s.outDbPath()) + be := backend.NewDefaultBackend(s.lg, s.outDbPath(), backend.WithMmapSize(s.initialMmapSize)) defer be.Close() s.cl.SetBackend(schema.NewMembershipBackend(s.lg, be)) for _, m := range s.cl.Members() { @@ -551,7 +559,7 @@ func (s *v3Manager) saveWALAndSnap() (*raftpb.HardState, error) { } func (s *v3Manager) updateCIndex(commit uint64, term uint64) error { - be := backend.NewDefaultBackend(s.lg, s.outDbPath()) + be := backend.NewDefaultBackend(s.lg, s.outDbPath(), backend.WithMmapSize(s.initialMmapSize)) defer be.Close() cindex.UpdateConsistentIndexForce(be.BatchTx(), commit, term) diff --git a/server/storage/backend/backend.go b/server/storage/backend/backend.go index 4b2a49cecfad..28bafe4edab2 100644 --- a/server/storage/backend/backend.go +++ b/server/storage/backend/backend.go @@ -36,10 +36,10 @@ var ( defragLimit = 10000 - // initialMmapSize is the initial size of the mmapped region. Setting this larger than + // InitialMmapSize is the initial size of the mmapped region. Setting this larger than // the potential max db size can prevent writer from blocking reader. // This only works for linux. - initialMmapSize = uint64(10 * 1024 * 1024 * 1024) + InitialMmapSize = uint64(10 * 1024 * 1024 * 1024) // minSnapshotWarningTimeout is the minimum threshold to trigger a long running snapshot warning. minSnapshotWarningTimeout = 30 * time.Second @@ -157,7 +157,7 @@ func DefaultBackendConfig(lg *zap.Logger) BackendConfig { return BackendConfig{ BatchInterval: defaultBatchInterval, BatchLimit: defaultBatchLimit, - MmapSize: initialMmapSize, + MmapSize: InitialMmapSize, Logger: lg, } }