From 63a1cc3fe40bace6898289dec35a9aad05163889 Mon Sep 17 00:00:00 2001 From: Sam Batschelet Date: Thu, 30 Sep 2021 12:44:39 -0400 Subject: [PATCH] add --experimental-max-learner flag Signed-off-by: Sam Batschelet --- server/config/config.go | 3 + server/embed/config.go | 4 + server/embed/etcd.go | 4 +- server/etcdmain/config.go | 2 + server/etcdmain/help.go | 4 +- server/etcdserver/api/membership/cluster.go | 74 +++++---- .../etcdserver/api/membership/cluster_opts.go | 43 ++++++ .../etcdserver/api/membership/cluster_test.go | 35 ++++- .../api/membership/membership_test.go | 4 +- server/etcdserver/bootstrap.go | 23 ++- server/etcdserver/bootstrap_test.go | 140 ++++++++++++++++++ tests/framework/integration/cluster.go | 9 +- tests/integration/clientv3/cluster_test.go | 33 +++-- 13 files changed, 330 insertions(+), 48 deletions(-) create mode 100644 server/etcdserver/api/membership/cluster_opts.go create mode 100644 server/etcdserver/bootstrap_test.go diff --git a/server/config/config.go b/server/config/config.go index c9e7d3aa3f0..74587efd6a0 100644 --- a/server/config/config.go +++ b/server/config/config.go @@ -185,6 +185,9 @@ type ServerConfig struct { // consider running defrag during bootstrap. Needs to be set to non-zero value to take effect. ExperimentalBootstrapDefragThresholdMegabytes uint `json:"experimental-bootstrap-defrag-threshold-megabytes"` + // ExperimentalMaxLearners sets a limit to the number of learner members that can exist in the cluster membership. + ExperimentalMaxLearners int `json:"experimental-max-learners"` + // V2Deprecation defines a phase of v2store deprecation process. V2Deprecation V2DeprecationEnum `json:"v2-deprecation"` } diff --git a/server/embed/config.go b/server/embed/config.go index abcdead5a8b..ecec546a0e0 100644 --- a/server/embed/config.go +++ b/server/embed/config.go @@ -34,6 +34,7 @@ import ( "go.etcd.io/etcd/pkg/v3/netutil" "go.etcd.io/etcd/server/v3/config" "go.etcd.io/etcd/server/v3/etcdserver" + "go.etcd.io/etcd/server/v3/etcdserver/api/membership" "go.etcd.io/etcd/server/v3/etcdserver/api/v3compactor" bolt "go.etcd.io/bbolt" @@ -329,6 +330,8 @@ type Config struct { // ExperimentalWarningUnaryRequestDuration is the time duration after which a warning is generated if applying // unary request takes more time than this value. ExperimentalWarningUnaryRequestDuration time.Duration `json:"experimental-warning-unary-request-duration"` + // ExperimentalMaxLearners sets a limit to the number of learner members that can exist in the cluster membership. + ExperimentalMaxLearners int `json:"experimental-max-learners"` // ForceNewCluster starts a new cluster even if previously started; unsafe. ForceNewCluster bool `json:"force-new-cluster"` @@ -503,6 +506,7 @@ func NewConfig() *Config { ExperimentalDowngradeCheckTime: DefaultDowngradeCheckTime, ExperimentalMemoryMlock: false, ExperimentalTxnModeWriteWithSharedBuffer: true, + ExperimentalMaxLearners: membership.DefaultMaxLearners, V2Deprecation: config.V2_DEPR_DEFAULT, } diff --git a/server/embed/etcd.go b/server/embed/etcd.go index 418199037cd..5970437f0f0 100644 --- a/server/embed/etcd.go +++ b/server/embed/etcd.go @@ -219,7 +219,8 @@ func StartEtcd(inCfg *Config) (e *Etcd, err error) { ExperimentalMemoryMlock: cfg.ExperimentalMemoryMlock, ExperimentalTxnModeWriteWithSharedBuffer: cfg.ExperimentalTxnModeWriteWithSharedBuffer, ExperimentalBootstrapDefragThresholdMegabytes: cfg.ExperimentalBootstrapDefragThresholdMegabytes, - V2Deprecation: cfg.V2DeprecationEffective(), + ExperimentalMaxLearners: cfg.ExperimentalMaxLearners, + V2Deprecation: cfg.V2DeprecationEffective(), } if srvcfg.ExperimentalEnableDistributedTracing { @@ -345,6 +346,7 @@ func print(lg *zap.Logger, ec Config, sc config.ServerConfig, memberInitialized zap.String("discovery-url", sc.DiscoveryURL), zap.String("discovery-proxy", sc.DiscoveryProxy), zap.String("downgrade-check-interval", sc.DowngradeCheckTime.String()), + zap.Int("max-learners", sc.ExperimentalMaxLearners), ) } diff --git a/server/etcdmain/config.go b/server/etcdmain/config.go index 2c3f135d88b..52c99eb6b7e 100644 --- a/server/etcdmain/config.go +++ b/server/etcdmain/config.go @@ -28,6 +28,7 @@ import ( "go.etcd.io/etcd/pkg/v3/flags" cconfig "go.etcd.io/etcd/server/v3/config" "go.etcd.io/etcd/server/v3/embed" + "go.etcd.io/etcd/server/v3/etcdserver/api/membership" "go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp" "go.uber.org/zap" @@ -291,6 +292,7 @@ func newConfig() *config { fs.BoolVar(&cfg.ec.ExperimentalMemoryMlock, "experimental-memory-mlock", cfg.ec.ExperimentalMemoryMlock, "Enable to enforce etcd pages (in particular bbolt) to stay in RAM.") fs.BoolVar(&cfg.ec.ExperimentalTxnModeWriteWithSharedBuffer, "experimental-txn-mode-write-with-shared-buffer", true, "Enable the write transaction to use a shared buffer in its readonly check operations.") fs.UintVar(&cfg.ec.ExperimentalBootstrapDefragThresholdMegabytes, "experimental-bootstrap-defrag-threshold-megabytes", 0, "Enable the defrag during etcd server bootstrap on condition that it will free at least the provided threshold of disk space. Needs to be set to non-zero value to take effect.") + fs.IntVar(&cfg.ec.ExperimentalMaxLearners, "experimental-max-learners", membership.DefaultMaxLearners, "Sets the maximum number of learners that can be available in the cluster membership.") // unsafe fs.BoolVar(&cfg.ec.UnsafeNoFsync, "unsafe-no-fsync", false, "Disables fsync, unsafe, will cause data loss.") diff --git a/server/etcdmain/help.go b/server/etcdmain/help.go index 1af8293340c..617e66bd7e5 100644 --- a/server/etcdmain/help.go +++ b/server/etcdmain/help.go @@ -245,13 +245,15 @@ Experimental feature: --experimental-watch-progress-notify-interval '10m' Duration of periodical watch progress notification. --experimental-warning-apply-duration '100ms' - Warning is generated if requests take more than this duration. + Warning is generated if requests take more than this duration. --experimental-txn-mode-write-with-shared-buffer 'true' Enable the write transaction to use a shared buffer in its readonly check operations. --experimental-bootstrap-defrag-threshold-megabytes Enable the defrag during etcd server bootstrap on condition that it will free at least the provided threshold of disk space. Needs to be set to non-zero value to take effect. --experimental-warning-unary-request-duration '300ms' Set time duration after which a warning is generated if a unary request takes more than this duration. + --experimental-max-learners '1' + Set the max number of learner members allowed in the cluster membership. Unsafe feature: --force-new-cluster 'false' diff --git a/server/etcdserver/api/membership/cluster.go b/server/etcdserver/api/membership/cluster.go index 795556b8835..18ac1a35f51 100644 --- a/server/etcdserver/api/membership/cluster.go +++ b/server/etcdserver/api/membership/cluster.go @@ -40,8 +40,6 @@ import ( "go.uber.org/zap" ) -const maxLearners = 1 - // RaftCluster is a list of Members that belong to the same raft cluster type RaftCluster struct { lg *zap.Logger @@ -60,6 +58,7 @@ type RaftCluster struct { removed map[types.ID]bool downgradeInfo *serverversion.DowngradeInfo + maxLearners int versionChanged *notify.Notifier } @@ -81,8 +80,8 @@ const ( // NewClusterFromURLsMap creates a new raft cluster using provided urls map. Currently, it does not support creating // cluster with raft learner member. -func NewClusterFromURLsMap(lg *zap.Logger, token string, urlsmap types.URLsMap) (*RaftCluster, error) { - c := NewCluster(lg) +func NewClusterFromURLsMap(lg *zap.Logger, token string, urlsmap types.URLsMap, opts ...ClusterOption) (*RaftCluster, error) { + c := NewCluster(lg, opts...) for name, urls := range urlsmap { m := NewMember(name, urls, token, nil) if _, ok := c.members[m.ID]; ok { @@ -97,8 +96,8 @@ func NewClusterFromURLsMap(lg *zap.Logger, token string, urlsmap types.URLsMap) return c, nil } -func NewClusterFromMembers(lg *zap.Logger, id types.ID, membs []*Member) *RaftCluster { - c := NewCluster(lg) +func NewClusterFromMembers(lg *zap.Logger, id types.ID, membs []*Member, opts ...ClusterOption) *RaftCluster { + c := NewCluster(lg, opts...) c.cid = id for _, m := range membs { c.members[m.ID] = m @@ -106,15 +105,18 @@ func NewClusterFromMembers(lg *zap.Logger, id types.ID, membs []*Member) *RaftCl return c } -func NewCluster(lg *zap.Logger) *RaftCluster { +func NewCluster(lg *zap.Logger, opts ...ClusterOption) *RaftCluster { if lg == nil { lg = zap.NewNop() } + clOpts := newClusterOpts(opts...) + return &RaftCluster{ lg: lg, members: make(map[types.ID]*Member), removed: make(map[types.ID]bool), downgradeInfo: &serverversion.DowngradeInfo{Enabled: false}, + maxLearners: clOpts.maxLearners, } } @@ -289,6 +291,7 @@ func (c *RaftCluster) Recover(onSet func(*zap.Logger, *semver.Version)) { zap.String("local-member-id", c.localID.String()), zap.String("recovered-remote-peer-id", m.ID.String()), zap.Strings("recovered-remote-peer-urls", m.PeerURLs), + zap.Bool("recovered-remote-peer-is-learner", m.IsLearner), ) } if c.version != nil { @@ -303,9 +306,9 @@ func (c *RaftCluster) Recover(onSet func(*zap.Logger, *semver.Version)) { // ensures that it is still valid. func (c *RaftCluster) ValidateConfigurationChange(cc raftpb.ConfChange) error { // TODO: this must be switched to backend as well. - members, removed := membersFromStore(c.lg, c.v2store) + membersMap, removedMap := membersFromStore(c.lg, c.v2store) id := types.ID(cc.NodeID) - if removed[id] { + if removedMap[id] { return ErrIDRemoved } switch cc.Type { @@ -316,19 +319,21 @@ func (c *RaftCluster) ValidateConfigurationChange(cc raftpb.ConfChange) error { } if confChangeContext.IsPromote { // promoting a learner member to voting member - if members[id] == nil { + if membersMap[id] == nil { return ErrIDNotFound } - if !members[id].IsLearner { + if !membersMap[id].IsLearner { return ErrMemberNotLearner } } else { // adding a new member - if members[id] != nil { + if membersMap[id] != nil { return ErrIDExists } + var members []*Member urls := make(map[string]bool) - for _, m := range members { + for _, m := range membersMap { + members = append(members, m) for _, u := range m.PeerURLs { urls[u] = true } @@ -339,29 +344,24 @@ func (c *RaftCluster) ValidateConfigurationChange(cc raftpb.ConfChange) error { } } - if confChangeContext.Member.IsLearner { // the new member is a learner - numLearners := 0 - for _, m := range members { - if m.IsLearner { - numLearners++ - } - } - if numLearners+1 > maxLearners { - return ErrTooManyLearners + if confChangeContext.Member.RaftAttributes.IsLearner && cc.Type == raftpb.ConfChangeAddLearnerNode { // the new member is a learner + scaleUpLearners := true + if err := ValidateMaxLearnerConfig(c.maxLearners, members, scaleUpLearners); err != nil { + return err } } } case raftpb.ConfChangeRemoveNode: - if members[id] == nil { + if membersMap[id] == nil { return ErrIDNotFound } case raftpb.ConfChangeUpdateNode: - if members[id] == nil { + if membersMap[id] == nil { return ErrIDNotFound } urls := make(map[string]bool) - for _, m := range members { + for _, m := range membersMap { if m.ID == id { continue } @@ -407,6 +407,7 @@ func (c *RaftCluster) AddMember(m *Member, shouldApplyV3 ShouldApplyV3) { zap.String("local-member-id", c.localID.String()), zap.String("added-peer-id", m.ID.String()), zap.Strings("added-peer-peer-urls", m.PeerURLs), + zap.Bool("added-peer-is-learner", m.IsLearner), ) } @@ -434,6 +435,7 @@ func (c *RaftCluster) RemoveMember(id types.ID, shouldApplyV3 ShouldApplyV3) { zap.String("local-member-id", c.localID.String()), zap.String("removed-remote-peer-id", id.String()), zap.Strings("removed-remote-peer-urls", m.PeerURLs), + zap.Bool("removed-remote-peer-is-learner", m.IsLearner), ) } else { c.lg.Warn( @@ -517,6 +519,7 @@ func (c *RaftCluster) UpdateRaftAttributes(id types.ID, raftAttr RaftAttributes, zap.String("local-member-id", c.localID.String()), zap.String("updated-remote-peer-id", id.String()), zap.Strings("updated-remote-peer-urls", raftAttr.PeerURLs), + zap.Bool("updated-remote-peer-is-learner", raftAttr.IsLearner), ) } @@ -831,3 +834,24 @@ func (c *RaftCluster) updateMembershipMetric(peer types.ID, known bool) { } knownPeers.WithLabelValues(c.localID.String(), peer.String()).Set(v) } + +// ValidateMaxLearnerConfig verifies the existing learner members in the cluster membership and an optional N+1 learner +// scale up are not more than maxLearners. +func ValidateMaxLearnerConfig(maxLearners int, members []*Member, scaleUpLearners bool) error { + numLearners := 0 + for _, m := range members { + if m.IsLearner { + numLearners++ + } + } + // Validate config can accommodate scale up. + if scaleUpLearners { + numLearners++ + } + + if numLearners > maxLearners { + return ErrTooManyLearners + } + + return nil +} diff --git a/server/etcdserver/api/membership/cluster_opts.go b/server/etcdserver/api/membership/cluster_opts.go new file mode 100644 index 00000000000..204fbf04d2c --- /dev/null +++ b/server/etcdserver/api/membership/cluster_opts.go @@ -0,0 +1,43 @@ +// Copyright 2021 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package membership + +const DefaultMaxLearners = 1 + +type ClusterOptions struct { + maxLearners int +} + +// ClusterOption are options which can be applied to the raft cluster. +type ClusterOption func(*ClusterOptions) + +func newClusterOpts(opts ...ClusterOption) *ClusterOptions { + clOpts := &ClusterOptions{} + clOpts.applyOpts(opts) + return clOpts +} + +func (co *ClusterOptions) applyOpts(opts []ClusterOption) { + for _, opt := range opts { + opt(co) + } +} + +// WithMaxLearners sets the maximum number of learners that can exist in the cluster membership. +func WithMaxLearners(max int) ClusterOption { + return func(co *ClusterOptions) { + co.maxLearners = max + } +} diff --git a/server/etcdserver/api/membership/cluster_test.go b/server/etcdserver/api/membership/cluster_test.go index e96e4cca34b..c8dc18c8a3f 100644 --- a/server/etcdserver/api/membership/cluster_test.go +++ b/server/etcdserver/api/membership/cluster_test.go @@ -278,10 +278,14 @@ func TestClusterValidateAndAssignIDs(t *testing.T) { } func TestClusterValidateConfigurationChange(t *testing.T) { - cl := NewCluster(zaptest.NewLogger(t)) + cl := NewCluster(zaptest.NewLogger(t), WithMaxLearners(1)) cl.SetStore(v2store.New()) for i := 1; i <= 4; i++ { - attr := RaftAttributes{PeerURLs: []string{fmt.Sprintf("http://127.0.0.1:%d", i)}} + var isLearner bool + if i == 1 { + isLearner = true + } + attr := RaftAttributes{PeerURLs: []string{fmt.Sprintf("http://127.0.0.1:%d", i)}, IsLearner: isLearner} cl.AddMember(&Member{ID: types.ID(i), RaftAttributes: attr}, true) } cl.RemoveMember(4, true) @@ -326,6 +330,17 @@ func TestClusterValidateConfigurationChange(t *testing.T) { t.Fatal(err) } + attr = RaftAttributes{PeerURLs: []string{fmt.Sprintf("http://127.0.0.1:%d", 7)}, IsLearner: true} + ctx7, err := json.Marshal(&ConfigChangeContext{Member: Member{ID: types.ID(7), RaftAttributes: attr}}) + if err != nil { + t.Fatal(err) + } + + attr = RaftAttributes{PeerURLs: []string{fmt.Sprintf("http://127.0.0.1:%d", 1)}, IsLearner: true} + ctx8, err := json.Marshal(&ConfigChangeContext{Member: Member{ID: types.ID(1), RaftAttributes: attr}, IsPromote: true}) + if err != nil { + t.Fatal(err) + } tests := []struct { cc raftpb.ConfChange werr error @@ -423,6 +438,22 @@ func TestClusterValidateConfigurationChange(t *testing.T) { }, ErrIDNotFound, }, + { + raftpb.ConfChange{ + Type: raftpb.ConfChangeAddLearnerNode, + NodeID: 7, + Context: ctx7, + }, + ErrTooManyLearners, + }, + { + raftpb.ConfChange{ + Type: raftpb.ConfChangeAddNode, + NodeID: 1, + Context: ctx8, + }, + nil, + }, } for i, tt := range tests { err := cl.ValidateConfigurationChange(tt.cc) diff --git a/server/etcdserver/api/membership/membership_test.go b/server/etcdserver/api/membership/membership_test.go index 221831d7b0b..62c74fc28d7 100644 --- a/server/etcdserver/api/membership/membership_test.go +++ b/server/etcdserver/api/membership/membership_test.go @@ -15,12 +15,14 @@ func TestAddRemoveMember(t *testing.T) { c := newTestCluster(t, nil) be := &backendMock{} c.SetBackend(be) - c.AddMember(newTestMember(17, nil, "node17", nil), true) + c.AddMember(newTestMemberAsLearner(17, nil, "node17", nil), true) c.RemoveMember(17, true) c.AddMember(newTestMember(18, nil, "node18", nil), true) + c.RemoveMember(18, true) // Skipping removal of already removed member c.RemoveMember(17, true) + c.RemoveMember(18, true) if false { // TODO: Enable this code when Recover is reading membership from the backend. diff --git a/server/etcdserver/bootstrap.go b/server/etcdserver/bootstrap.go index 3435289258a..d1b8bbe643e 100644 --- a/server/etcdserver/bootstrap.go +++ b/server/etcdserver/bootstrap.go @@ -236,6 +236,7 @@ func bootstrapBackend(cfg config.ServerConfig, haveWAL bool, st v2store.Store, s return nil, err } } + return &bootstrappedBackend{ beHooks: beHooks, be: be, @@ -285,7 +286,7 @@ func bootstrapExistingClusterNoWAL(cfg config.ServerConfig, prt http.RoundTrippe if err := cfg.VerifyJoinExisting(); err != nil { return nil, err } - cl, err := membership.NewClusterFromURLsMap(cfg.Logger, cfg.InitialClusterToken, cfg.InitialPeerURLsMap) + cl, err := membership.NewClusterFromURLsMap(cfg.Logger, cfg.InitialClusterToken, cfg.InitialPeerURLsMap, membership.WithMaxLearners(cfg.ExperimentalMaxLearners)) if err != nil { return nil, err } @@ -299,7 +300,10 @@ func bootstrapExistingClusterNoWAL(cfg config.ServerConfig, prt http.RoundTrippe if !isCompatibleWithCluster(cfg.Logger, cl, cl.MemberByName(cfg.Name).ID, prt) { return nil, fmt.Errorf("incompatible with current running cluster") } - + scaleUpLearners := false + if err := membership.ValidateMaxLearnerConfig(cfg.ExperimentalMaxLearners, existingCluster.Members(), scaleUpLearners); err != nil { + return nil, err + } remotes := existingCluster.Members() cl.SetID(types.ID(0), existingCluster.ID()) member := cl.MemberByName(cfg.Name) @@ -314,7 +318,7 @@ func bootstrapNewClusterNoWAL(cfg config.ServerConfig, prt http.RoundTripper) (* if err := cfg.VerifyBootstrap(); err != nil { return nil, err } - cl, err := membership.NewClusterFromURLsMap(cfg.Logger, cfg.InitialClusterToken, cfg.InitialPeerURLsMap) + cl, err := membership.NewClusterFromURLsMap(cfg.Logger, cfg.InitialClusterToken, cfg.InitialPeerURLsMap, membership.WithMaxLearners(cfg.ExperimentalMaxLearners)) if err != nil { return nil, err } @@ -336,7 +340,7 @@ func bootstrapNewClusterNoWAL(cfg config.ServerConfig, prt http.RoundTripper) (* if config.CheckDuplicateURL(urlsmap) { return nil, fmt.Errorf("discovery cluster %s has duplicate url", urlsmap) } - if cl, err = membership.NewClusterFromURLsMap(cfg.Logger, cfg.InitialClusterToken, urlsmap); err != nil { + if cl, err = membership.NewClusterFromURLsMap(cfg.Logger, cfg.InitialClusterToken, urlsmap, membership.WithMaxLearners(cfg.ExperimentalMaxLearners)); err != nil { return nil, err } } @@ -358,7 +362,13 @@ func bootstrapClusterWithWAL(cfg config.ServerConfig, meta *snapshotMetadata) (* zap.String("wal-dir", cfg.WALDir()), ) } - cl := membership.NewCluster(cfg.Logger) + cl := membership.NewCluster(cfg.Logger, membership.WithMaxLearners(cfg.ExperimentalMaxLearners)) + + scaleUpLearners := false + if err := membership.ValidateMaxLearnerConfig(cfg.ExperimentalMaxLearners, cl.Members(), scaleUpLearners); err != nil { + return nil, err + } + cl.SetID(meta.nodeID, meta.clusterID) return &bootstrapedCluster{ cl: cl, @@ -440,7 +450,8 @@ func (c *bootstrapedCluster) Finalize(cfg config.ServerConfig, s *bootstrappedSt return fmt.Errorf("database file (%v) of the backend is missing", bepath) } } - return nil + scaleUpLearners := false + return membership.ValidateMaxLearnerConfig(cfg.ExperimentalMaxLearners, c.cl.Members(), scaleUpLearners) } func (c *bootstrapedCluster) databaseFileMissing(s *bootstrappedStorage) bool { diff --git a/server/etcdserver/bootstrap_test.go b/server/etcdserver/bootstrap_test.go new file mode 100644 index 00000000000..8d6effd450f --- /dev/null +++ b/server/etcdserver/bootstrap_test.go @@ -0,0 +1,140 @@ +// Copyright 2021 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package version implements etcd version parsing and contains latest version +// information. + +package etcdserver + +import ( + "encoding/json" + "io" + "net/http" + "strings" + "testing" + + "go.etcd.io/etcd/api/v3/etcdserverpb" + "go.etcd.io/etcd/api/v3/version" + "go.etcd.io/etcd/client/pkg/v3/types" + "go.etcd.io/etcd/server/v3/config" + "go.etcd.io/etcd/server/v3/etcdserver/api/membership" + "go.uber.org/zap" +) + +func TestBootstrapExistingClusterNoWALMaxLearner(t *testing.T) { + tests := []struct { + name string + members []etcdserverpb.Member + maxLearner int + hasError bool + expectedError error + }{ + { + name: "bootstrap success: maxLearner gt learner count", + members: []etcdserverpb.Member{ + {ID: 4512484362714696085, PeerURLs: []string{"http://localhost:2380"}}, + {ID: 5321713336100798248, PeerURLs: []string{"http://localhost:2381"}}, + {ID: 5670219998796287055, PeerURLs: []string{"http://localhost:2382"}}, + }, + maxLearner: 1, + hasError: false, + expectedError: nil, + }, + { + name: "bootstrap success: maxLearner eq learner count", + members: []etcdserverpb.Member{ + {ID: 4512484362714696085, PeerURLs: []string{"http://localhost:2380"}, IsLearner: true}, + {ID: 5321713336100798248, PeerURLs: []string{"http://localhost:2381"}}, + {ID: 5670219998796287055, PeerURLs: []string{"http://localhost:2382"}, IsLearner: true}, + }, + maxLearner: 2, + hasError: false, + expectedError: nil, + }, + { + name: "bootstrap fail: maxLearner lt learner count", + members: []etcdserverpb.Member{ + {ID: 4512484362714696085, PeerURLs: []string{"http://localhost:2380"}}, + {ID: 5321713336100798248, PeerURLs: []string{"http://localhost:2381"}, IsLearner: true}, + {ID: 5670219998796287055, PeerURLs: []string{"http://localhost:2382"}, IsLearner: true}, + }, + maxLearner: 1, + hasError: true, + expectedError: membership.ErrTooManyLearners, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + cluster, err := types.NewURLsMap("node0=http://localhost:2380,node1=http://localhost:2381,node2=http://localhost:2382") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + cfg := config.ServerConfig{ + Name: "node0", + InitialPeerURLsMap: cluster, + Logger: zap.NewExample(), + ExperimentalMaxLearners: tt.maxLearner, + } + _, err = bootstrapExistingClusterNoWAL(cfg, mockBootstrapRoundTrip(tt.members)) + hasError := err != nil + if hasError != tt.hasError { + t.Errorf("expected error: %v got: %v", tt.hasError, err) + } + if hasError && !strings.Contains(err.Error(), tt.expectedError.Error()) { + t.Fatalf("expected error to contain: %q, got: %q", tt.expectedError.Error(), err.Error()) + } + }) + } +} + +type roundTripFunc func(r *http.Request) (*http.Response, error) + +func (s roundTripFunc) RoundTrip(r *http.Request) (*http.Response, error) { + return s(r) +} + +func mockBootstrapRoundTrip(members []etcdserverpb.Member) roundTripFunc { + return func(r *http.Request) (*http.Response, error) { + switch { + case strings.Contains(r.URL.String(), "/members"): + return &http.Response{ + StatusCode: http.StatusOK, + Body: io.NopCloser(strings.NewReader(mockMembersJSON(members))), + Header: http.Header{"X-Etcd-Cluster-Id": []string{"f4588138892a16b0"}}, + }, nil + case strings.Contains(r.URL.String(), "/version"): + return &http.Response{ + StatusCode: http.StatusOK, + Body: io.NopCloser(strings.NewReader(mockVersionJSON())), + }, nil + case strings.Contains(r.URL.String(), DowngradeEnabledPath): + return &http.Response{ + StatusCode: http.StatusOK, + Body: io.NopCloser(strings.NewReader(`true`)), + }, nil + } + return nil, nil + } +} + +func mockVersionJSON() string { + v := version.Versions{Server: "3.7.0", Cluster: "3.7.0"} + version, _ := json.Marshal(v) + return string(version) +} + +func mockMembersJSON(m []etcdserverpb.Member) string { + members, _ := json.Marshal(m) + return string(members) +} diff --git a/tests/framework/integration/cluster.go b/tests/framework/integration/cluster.go index 6087de09ee4..631e00689bc 100644 --- a/tests/framework/integration/cluster.go +++ b/tests/framework/integration/cluster.go @@ -45,6 +45,7 @@ import ( "go.etcd.io/etcd/server/v3/embed" "go.etcd.io/etcd/server/v3/etcdserver" "go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp" + "go.etcd.io/etcd/server/v3/etcdserver/api/membership" "go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp" "go.etcd.io/etcd/server/v3/etcdserver/api/v2http" "go.etcd.io/etcd/server/v3/etcdserver/api/v3client" @@ -169,6 +170,7 @@ type ClusterConfig struct { LeaseCheckpointInterval time.Duration WatchProgressNotifyInterval time.Duration + ExperimentalMaxLearners int } type Cluster struct { @@ -330,6 +332,7 @@ func (c *Cluster) mustNewMember(t testutil.TB, memberNumber int64) *Member { EnableLeaseCheckpoint: c.Cfg.EnableLeaseCheckpoint, LeaseCheckpointInterval: c.Cfg.LeaseCheckpointInterval, WatchProgressNotifyInterval: c.Cfg.WatchProgressNotifyInterval, + ExperimentalMaxLearners: c.Cfg.ExperimentalMaxLearners, }) m.DiscoveryURL = c.Cfg.DiscoveryURL if c.Cfg.UseGRPC { @@ -632,6 +635,7 @@ type MemberConfig struct { EnableLeaseCheckpoint bool LeaseCheckpointInterval time.Duration WatchProgressNotifyInterval time.Duration + ExperimentalMaxLearners int } // MustNewMember return an inited member with the given name. If peerTLS is @@ -735,7 +739,10 @@ func MustNewMember(t testutil.TB, mcfg MemberConfig) *Member { m.InitialCorruptCheck = true m.WarningApplyDuration = embed.DefaultWarningApplyDuration m.WarningUnaryRequestDuration = embed.DefaultWarningUnaryRequestDuration - + m.ExperimentalMaxLearners = membership.DefaultMaxLearners + if mcfg.ExperimentalMaxLearners != 0 { + m.ExperimentalMaxLearners = mcfg.ExperimentalMaxLearners + } m.V2Deprecation = config.V2_DEPR_DEFAULT m.GrpcServerRecorder = &grpc_testing.GrpcRecorder{} m.Logger = memberLogger(t, mcfg.Name) diff --git a/tests/integration/clientv3/cluster_test.go b/tests/integration/clientv3/cluster_test.go index b9a63c55c90..edf47f29b4f 100644 --- a/tests/integration/clientv3/cluster_test.go +++ b/tests/integration/clientv3/cluster_test.go @@ -16,6 +16,7 @@ package clientv3test import ( "context" + "fmt" "math/rand" "reflect" "strings" @@ -376,18 +377,28 @@ func TestMemberPromoteMemberNotExist(t *testing.T) { } } -// TestMaxLearnerInCluster verifies that the maximum number of learners allowed in a cluster is 1 +// TestMaxLearnerInCluster verifies that the maximum number of learners allowed in a cluster func TestMaxLearnerInCluster(t *testing.T) { integration2.BeforeTest(t) - // 1. start with a cluster with 3 voting member and 0 learner member - clus := integration2.NewClusterV3(t, &integration2.ClusterConfig{Size: 3}) + // 1. start with a cluster with 3 voting member and max learner 2 + clus := integration2.NewClusterV3(t, &integration2.ClusterConfig{Size: 3, ExperimentalMaxLearners: 2}) defer clus.Terminate(t) - // 2. adding a learner member should succeed - resp1, err := clus.Client(0).MemberAddAsLearner(context.Background(), []string{"http://127.0.0.1:1234"}) + // 2. adding 2 learner members should succeed + for i := 0; i < 2; i++ { + _, err := clus.Client(0).MemberAddAsLearner(context.Background(), []string{fmt.Sprintf("http://127.0.0.1:123%d", i)}) + if err != nil { + t.Fatalf("failed to add learner member %v", err) + } + } + + // ensure client endpoint is voting member + leaderIdx := clus.WaitLeader(t) + capi := clus.Client(leaderIdx) + resp1, err := capi.MemberList(context.Background()) if err != nil { - t.Fatalf("failed to add learner member %v", err) + t.Fatalf("failed to get member list") } numberOfLearners := 0 for _, m := range resp1.Members { @@ -395,12 +406,12 @@ func TestMaxLearnerInCluster(t *testing.T) { numberOfLearners++ } } - if numberOfLearners != 1 { - t.Fatalf("Added 1 learner node to cluster, got %d", numberOfLearners) + if numberOfLearners != 2 { + t.Fatalf("added 2 learner node to cluster, got %d", numberOfLearners) } - // 3. cluster has 3 voting member and 1 learner, adding another learner should fail - _, err = clus.Client(0).MemberAddAsLearner(context.Background(), []string{"http://127.0.0.1:2345"}) + // 3. cluster has 3 voting member and 2 learner, adding another learner should fail + _, err = clus.Client(0).MemberAddAsLearner(context.Background(), []string{"http://127.0.0.1:2342"}) if err == nil { t.Fatalf("expect member add to fail, got no error") } @@ -410,7 +421,7 @@ func TestMaxLearnerInCluster(t *testing.T) { } // 4. cluster has 3 voting member and 1 learner, adding a voting member should succeed - _, err = clus.Client(0).MemberAdd(context.Background(), []string{"http://127.0.0.1:3456"}) + _, err = clus.Client(0).MemberAdd(context.Background(), []string{"http://127.0.0.1:3453"}) if err != nil { t.Errorf("failed to add member %v", err) }