diff --git a/CHANGELOG-3.4.md b/CHANGELOG-3.4.md index d142eda2914..38730f1f4b8 100644 --- a/CHANGELOG-3.4.md +++ b/CHANGELOG-3.4.md @@ -32,7 +32,7 @@ See [code changes](https://github.com/coreos/etcd/compare/v3.3.0...v3.4.0) and [ - Make [Lease `Lookup` non-blocking with concurrent `Grant`/`Revoke`](https://github.com/coreos/etcd/pull/9229). - Make etcd server return `raft.ErrProposalDropped` on internal Raft proposal drop in [v3 applier](https://github.com/coreos/etcd/pull/9549) and [v2 applier](https://github.com/coreos/etcd/pull/9558). - e.g. a node is removed from cluster, or [`raftpb.MsgProp` arrives at current leader while there is an ongoing leadership transfer](https://github.com/coreos/etcd/issues/8975). -- Improve [functional tester](https://github.com/coreos/etcd/tree/master/functional) coverage: [proxy layer to run network fault tests in CI](https://github.com/coreos/etcd/pull/9081), [TLS is enabled both for server and client](https://github.com/coreos/etcd/pull/9534), [liveness mode](https://github.com/coreos/etcd/issues/9230), [shuffle test sequence](https://github.com/coreos/etcd/issues/9381), [membership reconfiguration failure cases](https://github.com/coreos/etcd/pull/9564), [disastrous quorum loss and snapshot recovery](TODO). +- Improve [functional tester](https://github.com/coreos/etcd/tree/master/functional) coverage: [proxy layer to run network fault tests in CI](https://github.com/coreos/etcd/pull/9081), [TLS is enabled both for server and client](https://github.com/coreos/etcd/pull/9534), [liveness mode](https://github.com/coreos/etcd/issues/9230), [shuffle test sequence](https://github.com/coreos/etcd/issues/9381), [membership reconfiguration failure cases](https://github.com/coreos/etcd/pull/9564), [disastrous quorum loss and snapshot recover from a seed member](https://github.com/coreos/etcd/pull/9565). ### Breaking Changes diff --git a/functional.yaml b/functional.yaml index 44583a12229..2caee689bea 100644 --- a/functional.yaml +++ b/functional.yaml @@ -2,15 +2,15 @@ agent-configs: - etcd-exec-path: ./bin/etcd agent-addr: 127.0.0.1:19027 failpoint-http-addr: http://127.0.0.1:7381 - base-dir: /tmp/etcd-agent-data-1 - etcd-log-path: /tmp/etcd-agent-data-1/current-etcd.log + base-dir: /tmp/etcd-functional-1 + etcd-log-path: /tmp/etcd-functional-1/etcd.log etcd-client-proxy: false etcd-peer-proxy: true etcd-client-endpoint: 127.0.0.1:1379 etcd: name: s1 - data-dir: /tmp/etcd-agent-data-1/etcd.data - wal-dir: /tmp/etcd-agent-data-1/etcd.data/member/wal + data-dir: /tmp/etcd-functional-1/etcd.data + wal-dir: /tmp/etcd-functional-1/etcd.data/member/wal heartbeat-interval: 100 election-timeout: 1000 listen-client-urls: ["https://127.0.0.1:1379"] @@ -34,18 +34,32 @@ agent-configs: quota-backend-bytes: 10740000000 # 10 GiB pre-vote: true initial-corrupt-check: true + client-cert-data: "" + client-cert-path: "" + client-key-data: "" + client-key-path: "" + client-trusted-ca-data: "" + client-trusted-ca-path: "" + peer-cert-data: "" + peer-cert-path: "" + peer-key-data: "" + peer-key-path: "" + peer-trusted-ca-data: "" + peer-trusted-ca-path: "" + snapshot-path: /tmp/etcd-functional-1.snapshot.db + - etcd-exec-path: ./bin/etcd agent-addr: 127.0.0.1:29027 failpoint-http-addr: http://127.0.0.1:7382 - base-dir: /tmp/etcd-agent-data-2 - etcd-log-path: /tmp/etcd-agent-data-2/current-etcd.log + base-dir: /tmp/etcd-functional-2 + etcd-log-path: /tmp/etcd-functional-2/etcd.log etcd-client-proxy: false etcd-peer-proxy: true etcd-client-endpoint: 127.0.0.1:2379 etcd: name: s2 - data-dir: /tmp/etcd-agent-data-2/etcd.data - wal-dir: /tmp/etcd-agent-data-2/etcd.data/member/wal + data-dir: /tmp/etcd-functional-2/etcd.data + wal-dir: /tmp/etcd-functional-2/etcd.data/member/wal heartbeat-interval: 100 election-timeout: 1000 listen-client-urls: ["https://127.0.0.1:2379"] @@ -69,18 +83,32 @@ agent-configs: quota-backend-bytes: 10740000000 # 10 GiB pre-vote: true initial-corrupt-check: true + client-cert-data: "" + client-cert-path: "" + client-key-data: "" + client-key-path: "" + client-trusted-ca-data: "" + client-trusted-ca-path: "" + peer-cert-data: "" + peer-cert-path: "" + peer-key-data: "" + peer-key-path: "" + peer-trusted-ca-data: "" + peer-trusted-ca-path: "" + snapshot-path: /tmp/etcd-functional-2.snapshot.db + - etcd-exec-path: ./bin/etcd agent-addr: 127.0.0.1:39027 failpoint-http-addr: http://127.0.0.1:7383 - base-dir: /tmp/etcd-agent-data-3 - etcd-log-path: /tmp/etcd-agent-data-3/current-etcd.log + base-dir: /tmp/etcd-functional-3 + etcd-log-path: /tmp/etcd-functional-3/etcd.log etcd-client-proxy: false etcd-peer-proxy: true etcd-client-endpoint: 127.0.0.1:3379 etcd: name: s3 - data-dir: /tmp/etcd-agent-data-3/etcd.data - wal-dir: /tmp/etcd-agent-data-3/etcd.data/member/wal + data-dir: /tmp/etcd-functional-3/etcd.data + wal-dir: /tmp/etcd-functional-3/etcd.data/member/wal heartbeat-interval: 100 election-timeout: 1000 listen-client-urls: ["https://127.0.0.1:3379"] @@ -104,6 +132,19 @@ agent-configs: quota-backend-bytes: 10740000000 # 10 GiB pre-vote: true initial-corrupt-check: true + client-cert-data: "" + client-cert-path: "" + client-key-data: "" + client-key-path: "" + client-trusted-ca-data: "" + client-trusted-ca-path: "" + peer-cert-data: "" + peer-cert-path: "" + peer-key-data: "" + peer-key-path: "" + peer-trusted-ca-data: "" + peer-trusted-ca-path: "" + snapshot-path: /tmp/etcd-functional-3.snapshot.db tester-config: data-dir: /tmp/etcd-tester-data @@ -116,15 +157,14 @@ tester-config: round-limit: 1 exit-on-failure: true - consistency-check: true enable-pprof: true - failure-delay-ms: 7000 - failure-shuffle: true + case-delay-ms: 7000 + case-shuffle: true # For full descriptions, - # https://godoc.org/github.com/coreos/etcd/functional/rpcpb#FailureCase - failure-cases: + # https://godoc.org/github.com/coreos/etcd/functional/rpcpb#Case + cases: - SIGTERM_ONE_FOLLOWER - SIGTERM_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT - SIGTERM_LEADER @@ -153,9 +193,9 @@ tester-config: - RANDOM_DELAY_PEER_PORT_TX_RX_ALL - NO_FAIL_WITH_STRESS - NO_FAIL_WITH_NO_STRESS_FOR_LIVENESS - # - SIGQUIT_AND_REMOVE_LEADER # - SIGQUIT_AND_REMOVE_LEADER_UNTIL_TRIGGER_SNAPSHOT + # - SIGQUIT_AND_REMOVE_QUORUM_AND_RESTORE_LEADER_SNAPSHOT_FROM_SCRATCH failpoint-commands: - panic("etcd-tester") @@ -164,7 +204,7 @@ tester-config: runner-exec-path: ./bin/etcd-runner external-exec-path: "" - stress-types: + stressers: - KV - LEASE # - ELECTION_RUNNER @@ -172,6 +212,10 @@ tester-config: # - LOCK_RACER_RUNNER # - LEASE_RUNNER + checkers: + - KV_HASH + - LEASE_EXPIRE + stress-key-size: 100 stress-key-size-large: 32769 stress-key-suffix-range: 250000 diff --git a/functional/README.md b/functional/README.md index 45e810006bf..f4b8cb1d72a 100644 --- a/functional/README.md +++ b/functional/README.md @@ -2,7 +2,7 @@ [`functional`](https://godoc.org/github.com/coreos/etcd/functional) verifies the correct behavior of etcd under various system and network malfunctions. It sets up an etcd cluster under high pressure loads and continuously injects failures into the cluster. Then it expects the etcd cluster to recover within a few seconds. This has been extremely helpful to find critical bugs. -See [`rpcpb.FailureCase`](https://godoc.org/github.com/coreos/etcd/functional/rpcpb#FailureCase) for all failure cases. +See [`rpcpb.Case`](https://godoc.org/github.com/coreos/etcd/functional/rpcpb#Case) for all failure cases. See [functional.yaml](https://github.com/coreos/etcd/blob/master/functional.yaml) for an example configuration. diff --git a/functional/agent/handler.go b/functional/agent/handler.go index e8eb4af70c5..7cd8e6cec35 100644 --- a/functional/agent/handler.go +++ b/functional/agent/handler.go @@ -57,6 +57,13 @@ func (srv *Server) handleTesterRequest(req *rpcpb.Request) (resp *rpcpb.Response case rpcpb.Operation_SIGQUIT_ETCD_AND_REMOVE_DATA: return srv.handle_SIGQUIT_ETCD_AND_REMOVE_DATA() + case rpcpb.Operation_SAVE_SNAPSHOT: + return srv.handle_SAVE_SNAPSHOT() + case rpcpb.Operation_RESTORE_RESTART_FROM_SNAPSHOT: + return srv.handle_RESTORE_RESTART_FROM_SNAPSHOT() + case rpcpb.Operation_RESTART_FROM_SNAPSHOT: + return srv.handle_RESTART_FROM_SNAPSHOT() + case rpcpb.Operation_SIGQUIT_ETCD_AND_ARCHIVE_DATA: return srv.handle_SIGQUIT_ETCD_AND_ARCHIVE_DATA() case rpcpb.Operation_SIGQUIT_ETCD_AND_REMOVE_DATA_AND_STOP_AGENT: @@ -96,7 +103,7 @@ func (srv *Server) handle_INITIAL_START_ETCD(req *rpcpb.Request) (*rpcpb.Respons return nil, err } - srv.creatEtcdCmd() + srv.creatEtcdCmd(false) if err = srv.saveTLSAssets(); err != nil { return nil, err @@ -225,8 +232,11 @@ func (srv *Server) createEtcdLogFile() error { return nil } -func (srv *Server) creatEtcdCmd() { +func (srv *Server) creatEtcdCmd(fromSnapshot bool) { etcdPath, etcdFlags := srv.Member.EtcdExecPath, srv.Member.Etcd.Flags() + if fromSnapshot { + etcdFlags = srv.Member.EtcdOnSnapshotRestore.Flags() + } u, _ := url.Parse(srv.Member.FailpointHTTPAddr) srv.lg.Info("creating etcd command", zap.String("etcd-exec-path", etcdPath), @@ -416,7 +426,7 @@ func (srv *Server) handle_RESTART_ETCD() (*rpcpb.Response, error) { } } - srv.creatEtcdCmd() + srv.creatEtcdCmd(false) if err = srv.saveTLSAssets(); err != nil { return nil, err @@ -502,6 +512,60 @@ func (srv *Server) handle_SIGQUIT_ETCD_AND_REMOVE_DATA() (*rpcpb.Response, error }, nil } +func (srv *Server) handle_SAVE_SNAPSHOT() (*rpcpb.Response, error) { + err := srv.Member.SaveSnapshot(srv.lg) + if err != nil { + return nil, err + } + return &rpcpb.Response{ + Success: true, + Status: "saved snapshot", + SnapshotInfo: srv.Member.SnapshotInfo, + }, nil +} + +func (srv *Server) handle_RESTORE_RESTART_FROM_SNAPSHOT() (resp *rpcpb.Response, err error) { + err = srv.Member.RestoreSnapshot(srv.lg) + if err != nil { + return nil, err + } + resp, err = srv.handle_RESTART_FROM_SNAPSHOT() + if resp != nil && err == nil { + resp.Status = "restored snapshot and " + resp.Status + } + return resp, err +} + +func (srv *Server) handle_RESTART_FROM_SNAPSHOT() (resp *rpcpb.Response, err error) { + srv.creatEtcdCmd(true) + + if err = srv.saveTLSAssets(); err != nil { + return nil, err + } + if err = srv.startEtcdCmd(); err != nil { + return nil, err + } + srv.lg.Info("restarted etcd", zap.String("command-path", srv.etcdCmd.Path)) + if err = srv.loadAutoTLSAssets(); err != nil { + return nil, err + } + + // wait some time for etcd listener start + // before setting up proxy + // TODO: local tests should handle port conflicts + // with clients on restart + time.Sleep(time.Second) + if err = srv.startProxy(); err != nil { + return nil, err + } + + return &rpcpb.Response{ + Success: true, + Status: "restarted etcd from snapshot", + SnapshotInfo: srv.Member.SnapshotInfo, + }, nil +} + func (srv *Server) handle_SIGQUIT_ETCD_AND_ARCHIVE_DATA() (*rpcpb.Response, error) { srv.stopProxy() diff --git a/functional/rpcpb/member.go b/functional/rpcpb/member.go index fede9f00c8c..ce49632bee4 100644 --- a/functional/rpcpb/member.go +++ b/functional/rpcpb/member.go @@ -18,16 +18,26 @@ import ( "context" "fmt" "net/url" + "os" "time" "github.com/coreos/etcd/clientv3" pb "github.com/coreos/etcd/etcdserver/etcdserverpb" "github.com/coreos/etcd/pkg/transport" + "github.com/coreos/etcd/pkg/types" + "github.com/coreos/etcd/snapshot" + "github.com/dustin/go-humanize" + "go.uber.org/zap" grpc "google.golang.org/grpc" "google.golang.org/grpc/credentials" ) +// ElectionTimeout returns an election timeout duration. +func (m *Member) ElectionTimeout() time.Duration { + return time.Duration(m.Etcd.ElectionTimeoutMs) * time.Millisecond +} + // DialEtcdGRPCServer creates a raw gRPC connection to an etcd member. func (m *Member) DialEtcdGRPCServer(opts ...grpc.DialOption) (*grpc.ClientConn, error) { dialOpts := []grpc.DialOption{ @@ -85,7 +95,7 @@ func (m *Member) CreateEtcdClient(opts ...grpc.DialOption) (*clientv3.Client, er cfg := clientv3.Config{ Endpoints: []string{m.EtcdClientEndpoint}, - DialTimeout: 5 * time.Second, + DialTimeout: 10 * time.Second, DialOptions: opts, } if secure { @@ -227,3 +237,126 @@ func (m *Member) WriteHealthKey() error { } return nil } + +// SaveSnapshot downloads a snapshot file from this member, locally. +// It's meant to requested remotely, so that local member can store +// snapshot file on local disk. +func (m *Member) SaveSnapshot(lg *zap.Logger) (err error) { + // remove existing snapshot first + if err = os.RemoveAll(m.SnapshotPath); err != nil { + return err + } + + var cli *clientv3.Client + cli, err = m.CreateEtcdClient() + if err != nil { + return fmt.Errorf("%v (%q)", err, m.EtcdClientEndpoint) + } + defer cli.Close() + + lg.Info( + "snapshot save START", + zap.String("member-name", m.Etcd.Name), + zap.Strings("member-client-urls", m.Etcd.AdvertiseClientURLs), + zap.String("snapshot-path", m.SnapshotPath), + ) + now := time.Now() + mgr := snapshot.NewV3(cli, lg) + if err = mgr.Save(context.Background(), m.SnapshotPath); err != nil { + return err + } + took := time.Since(now) + + var fi os.FileInfo + fi, err = os.Stat(m.SnapshotPath) + if err != nil { + return err + } + var st snapshot.Status + st, err = mgr.Status(m.SnapshotPath) + if err != nil { + return err + } + m.SnapshotInfo = &SnapshotInfo{ + MemberName: m.Etcd.Name, + MemberClientURLs: m.Etcd.AdvertiseClientURLs, + SnapshotPath: m.SnapshotPath, + SnapshotFileSize: humanize.Bytes(uint64(fi.Size())), + SnapshotTotalSize: humanize.Bytes(uint64(st.TotalSize)), + SnapshotTotalKey: int64(st.TotalKey), + SnapshotHash: int64(st.Hash), + SnapshotRevision: st.Revision, + Took: fmt.Sprintf("%v", took), + } + lg.Info( + "snapshot save END", + zap.String("member-name", m.SnapshotInfo.MemberName), + zap.Strings("member-client-urls", m.SnapshotInfo.MemberClientURLs), + zap.String("snapshot-path", m.SnapshotPath), + zap.String("snapshot-file-size", m.SnapshotInfo.SnapshotFileSize), + zap.String("snapshot-total-size", m.SnapshotInfo.SnapshotTotalSize), + zap.Int64("snapshot-total-key", m.SnapshotInfo.SnapshotTotalKey), + zap.Int64("snapshot-hash", m.SnapshotInfo.SnapshotHash), + zap.Int64("snapshot-revision", m.SnapshotInfo.SnapshotRevision), + zap.String("took", m.SnapshotInfo.Took), + ) + return nil +} + +// RestoreSnapshot restores a cluster from a given snapshot file on disk. +// It's meant to requested remotely, so that local member can load the +// snapshot file from local disk. +func (m *Member) RestoreSnapshot(lg *zap.Logger) (err error) { + if err = os.RemoveAll(m.EtcdOnSnapshotRestore.DataDir); err != nil { + return err + } + if err = os.RemoveAll(m.EtcdOnSnapshotRestore.WALDir); err != nil { + return err + } + + var initialCluster types.URLsMap + initialCluster, err = types.NewURLsMap(m.EtcdOnSnapshotRestore.InitialCluster) + if err != nil { + return err + } + var peerURLs types.URLs + peerURLs, err = types.NewURLs(m.EtcdOnSnapshotRestore.AdvertisePeerURLs) + if err != nil { + return err + } + + lg.Info( + "snapshot restore START", + zap.String("member-name", m.Etcd.Name), + zap.Strings("member-client-urls", m.Etcd.AdvertiseClientURLs), + zap.String("snapshot-path", m.SnapshotPath), + ) + now := time.Now() + mgr := snapshot.NewV3(nil, lg) + err = mgr.Restore(m.SnapshotInfo.SnapshotPath, snapshot.RestoreConfig{ + Name: m.EtcdOnSnapshotRestore.Name, + OutputDataDir: m.EtcdOnSnapshotRestore.DataDir, + OutputWALDir: m.EtcdOnSnapshotRestore.WALDir, + InitialCluster: initialCluster, + InitialClusterToken: m.EtcdOnSnapshotRestore.InitialClusterToken, + PeerURLs: peerURLs, + SkipHashCheck: false, + + // TODO: SkipHashCheck == true, for recover from existing db file + }) + took := time.Since(now) + lg.Info( + "snapshot restore END", + zap.String("member-name", m.SnapshotInfo.MemberName), + zap.Strings("member-client-urls", m.SnapshotInfo.MemberClientURLs), + zap.String("snapshot-path", m.SnapshotPath), + zap.String("snapshot-file-size", m.SnapshotInfo.SnapshotFileSize), + zap.String("snapshot-total-size", m.SnapshotInfo.SnapshotTotalSize), + zap.Int64("snapshot-total-key", m.SnapshotInfo.SnapshotTotalKey), + zap.Int64("snapshot-hash", m.SnapshotInfo.SnapshotHash), + zap.Int64("snapshot-revision", m.SnapshotInfo.SnapshotRevision), + zap.String("took", took.String()), + zap.Error(err), + ) + return err +} diff --git a/functional/rpcpb/rpc.pb.go b/functional/rpcpb/rpc.pb.go index c679e76d2c8..1c488dc4733 100644 --- a/functional/rpcpb/rpc.pb.go +++ b/functional/rpcpb/rpc.pb.go @@ -9,6 +9,7 @@ It has these top-level messages: Request + SnapshotInfo Response Member Tester @@ -52,12 +53,23 @@ const ( // SIGQUIT_ETCD_AND_REMOVE_DATA kills etcd process and removes all data // directories to simulate destroying the whole machine. Operation_SIGQUIT_ETCD_AND_REMOVE_DATA Operation = 21 + // SAVE_SNAPSHOT is sent to trigger local member to download its snapshot + // onto its local disk with the specified path from tester. + Operation_SAVE_SNAPSHOT Operation = 30 + // RESTORE_RESTART_FROM_SNAPSHOT is sent to trigger local member to + // restore a cluster from existing snapshot from disk, and restart + // an etcd instance from recovered data. + Operation_RESTORE_RESTART_FROM_SNAPSHOT Operation = 31 + // RESTART_FROM_SNAPSHOT is sent to trigger local member to restart + // and join an existing cluster that has been recovered from a snapshot. + // Local member joins this cluster with fresh data. + Operation_RESTART_FROM_SNAPSHOT Operation = 32 // SIGQUIT_ETCD_AND_ARCHIVE_DATA is sent when consistency check failed, // thus need to archive etcd data directories. - Operation_SIGQUIT_ETCD_AND_ARCHIVE_DATA Operation = 30 + Operation_SIGQUIT_ETCD_AND_ARCHIVE_DATA Operation = 40 // SIGQUIT_ETCD_AND_REMOVE_DATA_AND_STOP_AGENT destroys etcd process, // etcd data, and agent server. - Operation_SIGQUIT_ETCD_AND_REMOVE_DATA_AND_STOP_AGENT Operation = 31 + Operation_SIGQUIT_ETCD_AND_REMOVE_DATA_AND_STOP_AGENT Operation = 41 // BLACKHOLE_PEER_PORT_TX_RX drops all outgoing/incoming packets from/to // the peer port on target member's peer port. Operation_BLACKHOLE_PEER_PORT_TX_RX Operation = 100 @@ -76,8 +88,11 @@ var Operation_name = map[int32]string{ 11: "RESTART_ETCD", 20: "SIGTERM_ETCD", 21: "SIGQUIT_ETCD_AND_REMOVE_DATA", - 30: "SIGQUIT_ETCD_AND_ARCHIVE_DATA", - 31: "SIGQUIT_ETCD_AND_REMOVE_DATA_AND_STOP_AGENT", + 30: "SAVE_SNAPSHOT", + 31: "RESTORE_RESTART_FROM_SNAPSHOT", + 32: "RESTART_FROM_SNAPSHOT", + 40: "SIGQUIT_ETCD_AND_ARCHIVE_DATA", + 41: "SIGQUIT_ETCD_AND_REMOVE_DATA_AND_STOP_AGENT", 100: "BLACKHOLE_PEER_PORT_TX_RX", 101: "UNBLACKHOLE_PEER_PORT_TX_RX", 200: "DELAY_PEER_PORT_TX_RX", @@ -89,8 +104,11 @@ var Operation_value = map[string]int32{ "RESTART_ETCD": 11, "SIGTERM_ETCD": 20, "SIGQUIT_ETCD_AND_REMOVE_DATA": 21, - "SIGQUIT_ETCD_AND_ARCHIVE_DATA": 30, - "SIGQUIT_ETCD_AND_REMOVE_DATA_AND_STOP_AGENT": 31, + "SAVE_SNAPSHOT": 30, + "RESTORE_RESTART_FROM_SNAPSHOT": 31, + "RESTART_FROM_SNAPSHOT": 32, + "SIGQUIT_ETCD_AND_ARCHIVE_DATA": 40, + "SIGQUIT_ETCD_AND_REMOVE_DATA_AND_STOP_AGENT": 41, "BLACKHOLE_PEER_PORT_TX_RX": 100, "UNBLACKHOLE_PEER_PORT_TX_RX": 101, "DELAY_PEER_PORT_TX_RX": 200, @@ -102,18 +120,18 @@ func (x Operation) String() string { } func (Operation) EnumDescriptor() ([]byte, []int) { return fileDescriptorRpc, []int{0} } -// FailureCase defines various system faults in distributed systems, +// Case defines various system faults or test case in distributed systems, // in order to verify correct behavior of etcd servers and clients. -type FailureCase int32 +type Case int32 const ( // SIGTERM_ONE_FOLLOWER stops a randomly chosen follower (non-leader) // but does not delete its data directories on disk for next restart. - // It waits "failure-delay-ms" before recovering this failure. + // It waits "delay-ms" before recovering this failure. // The expected behavior is that the follower comes back online // and rejoins the cluster, and then each member continues to process // client requests ('Put' request that requires Raft consensus). - FailureCase_SIGTERM_ONE_FOLLOWER FailureCase = 0 + Case_SIGTERM_ONE_FOLLOWER Case = 0 // SIGTERM_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT stops a randomly chosen // follower but does not delete its data directories on disk for next // restart. And waits until most up-to-date node (leader) applies the @@ -123,16 +141,15 @@ const ( // to the follower to force it to follow the leader's log. // As always, after recovery, each member must be able to process // client requests. - FailureCase_SIGTERM_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT FailureCase = 1 + Case_SIGTERM_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT Case = 1 // SIGTERM_LEADER stops the active leader node but does not delete its - // data directories on disk for next restart. Then it waits - // "failure-delay-ms" before recovering this failure, in order to - // trigger election timeouts. + // data directories on disk for next restart. Then it waits "delay-ms" + // before recovering this failure, in order to trigger election timeouts. // The expected behavior is that a new leader gets elected, and the // old leader comes back online and rejoins the cluster as a follower. // As always, after recovery, each member must be able to process // client requests. - FailureCase_SIGTERM_LEADER FailureCase = 2 + Case_SIGTERM_LEADER Case = 2 // SIGTERM_LEADER_UNTIL_TRIGGER_SNAPSHOT stops the active leader node // but does not delete its data directories on disk for next restart. // And waits until most up-to-date node ("new" leader) applies the @@ -142,32 +159,31 @@ const ( // And it receives the snapshot from the new leader to overwrite its // store. As always, after recovery, each member must be able to // process client requests. - FailureCase_SIGTERM_LEADER_UNTIL_TRIGGER_SNAPSHOT FailureCase = 3 + Case_SIGTERM_LEADER_UNTIL_TRIGGER_SNAPSHOT Case = 3 // SIGTERM_QUORUM stops majority number of nodes to make the whole cluster // inoperable but does not delete data directories on stopped nodes - // for next restart. And it waits "failure-delay-ms" before recovering - // this failure. + // for next restart. And it waits "delay-ms" before recovering failure. // The expected behavior is that nodes come back online, thus cluster // comes back operative as well. As always, after recovery, each member // must be able to process client requests. - FailureCase_SIGTERM_QUORUM FailureCase = 4 + Case_SIGTERM_QUORUM Case = 4 // SIGTERM_ALL stops the whole cluster but does not delete data directories - // on disk for next restart. And it waits "failure-delay-ms" before - // recovering this failure. + // on disk for next restart. And it waits "delay-ms" before recovering + // this failure. // The expected behavior is that nodes come back online, thus cluster // comes back operative as well. As always, after recovery, each member // must be able to process client requests. - FailureCase_SIGTERM_ALL FailureCase = 5 + Case_SIGTERM_ALL Case = 5 // SIGQUIT_AND_REMOVE_ONE_FOLLOWER stops a randomly chosen follower // (non-leader), deletes its data directories on disk, and removes // this member from cluster (membership reconfiguration). On recovery, // tester adds a new member, and this member joins the existing cluster - // with fresh data. It waits "failure-delay-ms" before recovering this + // with fresh data. It waits "delay-ms" before recovering this // failure. This simulates destroying one follower machine, where operator // needs to add a new member from a fresh machine. // The expected behavior is that a new member joins the existing cluster, // and then each member continues to process client requests. - FailureCase_SIGQUIT_AND_REMOVE_ONE_FOLLOWER FailureCase = 10 + Case_SIGQUIT_AND_REMOVE_ONE_FOLLOWER Case = 10 // SIGQUIT_AND_REMOVE_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT stops a randomly // chosen follower, deletes its data directories on disk, and removes // this member from cluster (membership reconfiguration). On recovery, @@ -179,16 +195,16 @@ const ( // The expected behavior is that a new member joins the existing cluster, // and receives a snapshot from the active leader. As always, after // recovery, each member must be able to process client requests. - FailureCase_SIGQUIT_AND_REMOVE_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT FailureCase = 11 + Case_SIGQUIT_AND_REMOVE_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT Case = 11 // SIGQUIT_AND_REMOVE_LEADER stops the active leader node, deletes its // data directories on disk, and removes this member from cluster. // On recovery, tester adds a new member, and this member joins the - // existing cluster with fresh data. It waits "failure-delay-ms" before + // existing cluster with fresh data. It waits "delay-ms" before // recovering this failure. This simulates destroying a leader machine, // where operator needs to add a new member from a fresh machine. // The expected behavior is that a new member joins the existing cluster, // and then each member continues to process client requests. - FailureCase_SIGQUIT_AND_REMOVE_LEADER FailureCase = 12 + Case_SIGQUIT_AND_REMOVE_LEADER Case = 12 // SIGQUIT_AND_REMOVE_LEADER_UNTIL_TRIGGER_SNAPSHOT stops the active leader, // deletes its data directories on disk, and removes this member from // cluster (membership reconfiguration). On recovery, tester adds a new @@ -201,13 +217,39 @@ const ( // leader, and a new member joins the existing cluster and receives a // snapshot from the newly elected leader. As always, after recovery, each // member must be able to process client requests. - FailureCase_SIGQUIT_AND_REMOVE_LEADER_UNTIL_TRIGGER_SNAPSHOT FailureCase = 13 + Case_SIGQUIT_AND_REMOVE_LEADER_UNTIL_TRIGGER_SNAPSHOT Case = 13 + // SIGQUIT_AND_REMOVE_QUORUM_AND_RESTORE_LEADER_SNAPSHOT_FROM_SCRATCH first + // stops majority number of nodes, deletes data directories on those quorum + // nodes, to make the whole cluster inoperable. Now that quorum and their + // data are totally destroyed, cluster cannot even remove unavailable nodes + // (e.g. 2 out of 3 are lost, so no leader can be elected). + // Let's assume 3-node cluster of node A, B, and C. One day, node A and B + // are destroyed and all their data are gone. The only viable solution is + // to recover from C's latest snapshot. + // + // To simulate: + // 1. Assume node C is the current leader with most up-to-date data. + // 2. Download snapshot from node C, before destroying node A and B. + // 3. Destroy node A and B, and make the whole cluster inoperable. + // 4. Now node C cannot operate either. + // 5. SIGTERM node C and remove its data directories. + // 6. Restore a new seed member from node C's latest snapshot file. + // 7. Add another member to establish 2-node cluster. + // 8. Add another member to establish 3-node cluster. + // 9. Add more if any. + // + // The expected behavior is that etcd successfully recovers from such + // disastrous situation as only 1-node survives out of 3-node cluster, + // new members joins the existing cluster, and previous data from snapshot + // are still preserved after recovery process. As always, after recovery, + // each member must be able to process client requests. + Case_SIGQUIT_AND_REMOVE_QUORUM_AND_RESTORE_LEADER_SNAPSHOT_FROM_SCRATCH Case = 14 // BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER drops all outgoing/incoming // packets from/to the peer port on a randomly chosen follower - // (non-leader), and waits for "failure-delay-ms" until recovery. + // (non-leader), and waits for "delay-ms" until recovery. // The expected behavior is that once dropping operation is undone, // each member must be able to process client requests. - FailureCase_BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER FailureCase = 100 + Case_BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER Case = 100 // BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT drops // all outgoing/incoming packets from/to the peer port on a randomly // chosen follower (non-leader), and waits for most up-to-date node @@ -217,15 +259,15 @@ const ( // the slow follower tries to catch up, possibly receiving the snapshot // from the active leader. As always, after recovery, each member must // be able to process client requests. - FailureCase_BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT FailureCase = 101 + Case_BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT Case = 101 // BLACKHOLE_PEER_PORT_TX_RX_LEADER drops all outgoing/incoming packets // from/to the peer port on the active leader (isolated), and waits for - // "failure-delay-ms" until recovery, in order to trigger election timeout. + // "delay-ms" until recovery, in order to trigger election timeout. // The expected behavior is that after election timeout, a new leader gets // elected, and once dropping operation is undone, the old leader comes // back and rejoins the cluster as a follower. As always, after recovery, // each member must be able to process client requests. - FailureCase_BLACKHOLE_PEER_PORT_TX_RX_LEADER FailureCase = 102 + Case_BLACKHOLE_PEER_PORT_TX_RX_LEADER Case = 102 // BLACKHOLE_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT drops all // outgoing/incoming packets from/to the peer port on the active leader, // and waits for most up-to-date node (leader) applies the snapshot @@ -235,37 +277,37 @@ const ( // the cluster as a follower. The slow follower tries to catch up, likely // receiving the snapshot from the new active leader. As always, after // recovery, each member must be able to process client requests. - FailureCase_BLACKHOLE_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT FailureCase = 103 + Case_BLACKHOLE_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT Case = 103 // BLACKHOLE_PEER_PORT_TX_RX_QUORUM drops all outgoing/incoming packets // from/to the peer ports on majority nodes of cluster, thus losing its - // leader and cluster being inoperable. And it waits for "failure-delay-ms" + // leader and cluster being inoperable. And it waits for "delay-ms" // until recovery. // The expected behavior is that once packet drop operation is undone, // nodes come back online, thus cluster comes back operative. As always, // after recovery, each member must be able to process client requests. - FailureCase_BLACKHOLE_PEER_PORT_TX_RX_QUORUM FailureCase = 104 + Case_BLACKHOLE_PEER_PORT_TX_RX_QUORUM Case = 104 // BLACKHOLE_PEER_PORT_TX_RX_ALL drops all outgoing/incoming packets // from/to the peer ports on all nodes, thus making cluster totally - // inoperable. It waits for "failure-delay-ms" until recovery. + // inoperable. It waits for "delay-ms" until recovery. // The expected behavior is that once packet drop operation is undone, // nodes come back online, thus cluster comes back operative. As always, // after recovery, each member must be able to process client requests. - FailureCase_BLACKHOLE_PEER_PORT_TX_RX_ALL FailureCase = 105 + Case_BLACKHOLE_PEER_PORT_TX_RX_ALL Case = 105 // DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER delays outgoing/incoming packets // from/to the peer port on a randomly chosen follower (non-leader). - // It waits for "failure-delay-ms" until recovery. + // It waits for "delay-ms" until recovery. // The expected behavior is that once packet delay operation is undone, // the follower comes back and tries to catch up with latest changes from // cluster. And as always, after recovery, each member must be able to // process client requests. - FailureCase_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER FailureCase = 200 + Case_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER Case = 200 // RANDOM_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER delays outgoing/incoming // packets from/to the peer port on a randomly chosen follower - // (non-leader) with a randomized time duration (thus isolated). It waits - // for "failure-delay-ms" until recovery. + // (non-leader) with a randomized time duration (thus isolated). It + // waits for "delay-ms" until recovery. // The expected behavior is that once packet delay operation is undone, // each member must be able to process client requests. - FailureCase_RANDOM_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER FailureCase = 201 + Case_RANDOM_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER Case = 201 // DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT delays // outgoing/incoming packets from/to the peer port on a randomly chosen // follower (non-leader), and waits for most up-to-date node (leader) @@ -275,7 +317,7 @@ const ( // the slow follower comes back and catches up possibly receiving snapshot // from the active leader. As always, after recovery, each member must be // able to process client requests. - FailureCase_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT FailureCase = 202 + Case_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT Case = 202 // RANDOM_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT delays // outgoing/incoming packets from/to the peer port on a randomly chosen // follower (non-leader) with a randomized time duration, and waits for @@ -286,23 +328,23 @@ const ( // the slow follower comes back and catches up, possibly receiving a // snapshot from the active leader. As always, after recovery, each member // must be able to process client requests. - FailureCase_RANDOM_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT FailureCase = 203 + Case_RANDOM_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT Case = 203 // DELAY_PEER_PORT_TX_RX_LEADER delays outgoing/incoming packets from/to - // the peer port on the active leader. And waits for "failure-delay-ms" - // until recovery. + // the peer port on the active leader. And waits for "delay-ms" until + // recovery. // The expected behavior is that cluster may elect a new leader, and // once packet delay operation is undone, the (old) leader comes back // and tries to catch up with latest changes from cluster. As always, // after recovery, each member must be able to process client requests. - FailureCase_DELAY_PEER_PORT_TX_RX_LEADER FailureCase = 204 + Case_DELAY_PEER_PORT_TX_RX_LEADER Case = 204 // RANDOM_DELAY_PEER_PORT_TX_RX_LEADER delays outgoing/incoming packets // from/to the peer port on the active leader with a randomized time - // duration. And waits for "failure-delay-ms" until recovery. + // duration. And waits for "delay-ms" until recovery. // The expected behavior is that cluster may elect a new leader, and // once packet delay operation is undone, the (old) leader comes back // and tries to catch up with latest changes from cluster. As always, // after recovery, each member must be able to process client requests. - FailureCase_RANDOM_DELAY_PEER_PORT_TX_RX_LEADER FailureCase = 205 + Case_RANDOM_DELAY_PEER_PORT_TX_RX_LEADER Case = 205 // DELAY_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT delays // outgoing/incoming packets from/to the peer port on the active leader, // and waits for most up-to-date node (current or new leader) applies the @@ -313,7 +355,7 @@ const ( // and catches up, likely receiving a snapshot from the active leader. // As always, after recovery, each member must be able to process client // requests. - FailureCase_DELAY_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT FailureCase = 206 + Case_DELAY_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT Case = 206 // RANDOM_DELAY_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT delays // outgoing/incoming packets from/to the peer port on the active leader, // with a randomized time duration. And it waits for most up-to-date node @@ -325,59 +367,71 @@ const ( // and catches up, likely receiving a snapshot from the active leader. // As always, after recovery, each member must be able to process client // requests. - FailureCase_RANDOM_DELAY_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT FailureCase = 207 + Case_RANDOM_DELAY_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT Case = 207 // DELAY_PEER_PORT_TX_RX_QUORUM delays outgoing/incoming packets from/to // the peer ports on majority nodes of cluster. And it waits for - // "failure-delay-ms" until recovery, likely to trigger election timeouts. + // "delay-ms" until recovery, likely to trigger election timeouts. // The expected behavior is that cluster may elect a new leader, while // quorum of nodes struggle with slow networks, and once delay operation // is undone, nodes come back and cluster comes back operative. As always, // after recovery, each member must be able to process client requests. - FailureCase_DELAY_PEER_PORT_TX_RX_QUORUM FailureCase = 208 + Case_DELAY_PEER_PORT_TX_RX_QUORUM Case = 208 // RANDOM_DELAY_PEER_PORT_TX_RX_QUORUM delays outgoing/incoming packets // from/to the peer ports on majority nodes of cluster, with randomized - // time durations. And it waits for "failure-delay-ms" until recovery, - // likely to trigger election timeouts. + // time durations. And it waits for "delay-ms" until recovery, likely + // to trigger election timeouts. // The expected behavior is that cluster may elect a new leader, while // quorum of nodes struggle with slow networks, and once delay operation // is undone, nodes come back and cluster comes back operative. As always, // after recovery, each member must be able to process client requests. - FailureCase_RANDOM_DELAY_PEER_PORT_TX_RX_QUORUM FailureCase = 209 + Case_RANDOM_DELAY_PEER_PORT_TX_RX_QUORUM Case = 209 // DELAY_PEER_PORT_TX_RX_ALL delays outgoing/incoming packets from/to the - // peer ports on all nodes. And it waits for "failure-delay-ms" until - // recovery, likely to trigger election timeouts. + // peer ports on all nodes. And it waits for "delay-ms" until recovery, + // likely to trigger election timeouts. // The expected behavior is that cluster may become totally inoperable, // struggling with slow networks across the whole cluster. Once delay // operation is undone, nodes come back and cluster comes back operative. // As always, after recovery, each member must be able to process client // requests. - FailureCase_DELAY_PEER_PORT_TX_RX_ALL FailureCase = 210 + Case_DELAY_PEER_PORT_TX_RX_ALL Case = 210 // RANDOM_DELAY_PEER_PORT_TX_RX_ALL delays outgoing/incoming packets // from/to the peer ports on all nodes, with randomized time durations. - // And it waits for "failure-delay-ms" until recovery, likely to trigger + // And it waits for "delay-ms" until recovery, likely to trigger // election timeouts. // The expected behavior is that cluster may become totally inoperable, // struggling with slow networks across the whole cluster. Once delay // operation is undone, nodes come back and cluster comes back operative. // As always, after recovery, each member must be able to process client // requests. - FailureCase_RANDOM_DELAY_PEER_PORT_TX_RX_ALL FailureCase = 211 - // NO_FAIL_WITH_STRESS runs no-op failure injection that does not do - // anything against cluster for "failure-delay-ms" duration, while - // stressers are still sending requests. - FailureCase_NO_FAIL_WITH_STRESS FailureCase = 300 - // NO_FAIL_WITH_NO_STRESS_FOR_LIVENESS runs no-op failure injection - // that does not do anything against cluster for "failure-delay-ms" - // duration, while all stressers are stopped. - FailureCase_NO_FAIL_WITH_NO_STRESS_FOR_LIVENESS FailureCase = 301 + Case_RANDOM_DELAY_PEER_PORT_TX_RX_ALL Case = 211 + // NO_FAIL_WITH_STRESS stops injecting failures while testing the + // consistency and correctness under pressure loads, for the duration of + // "delay-ms". Goal is to ensure cluster be still making progress + // on recovery, and verify system does not deadlock following a sequence + // of failure injections. + // The expected behavior is that cluster remains fully operative in healthy + // condition. As always, after recovery, each member must be able to process + // client requests. + Case_NO_FAIL_WITH_STRESS Case = 300 + // NO_FAIL_WITH_NO_STRESS_FOR_LIVENESS neither injects failures nor + // sends stressig client requests to the cluster, for the duration of + // "delay-ms". Goal is to ensure cluster be still making progress + // on recovery, and verify system does not deadlock following a sequence + // of failure injections. + // The expected behavior is that cluster remains fully operative in healthy + // condition, and clients requests during liveness period succeed without + // errors. + // Note: this is how Google Chubby does failure injection testing + // https://static.googleusercontent.com/media/research.google.com/en//archive/paxos_made_live.pdf. + Case_NO_FAIL_WITH_NO_STRESS_FOR_LIVENESS Case = 301 // FAILPOINTS injects failpoints to etcd server runtime, triggering panics // in critical code paths. - FailureCase_FAILPOINTS FailureCase = 400 + Case_FAILPOINTS Case = 400 // EXTERNAL runs external failure injection scripts. - FailureCase_EXTERNAL FailureCase = 500 + Case_EXTERNAL Case = 500 ) -var FailureCase_name = map[int32]string{ +var Case_name = map[int32]string{ 0: "SIGTERM_ONE_FOLLOWER", 1: "SIGTERM_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT", 2: "SIGTERM_LEADER", @@ -388,6 +442,7 @@ var FailureCase_name = map[int32]string{ 11: "SIGQUIT_AND_REMOVE_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT", 12: "SIGQUIT_AND_REMOVE_LEADER", 13: "SIGQUIT_AND_REMOVE_LEADER_UNTIL_TRIGGER_SNAPSHOT", + 14: "SIGQUIT_AND_REMOVE_QUORUM_AND_RESTORE_LEADER_SNAPSHOT_FROM_SCRATCH", 100: "BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER", 101: "BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT", 102: "BLACKHOLE_PEER_PORT_TX_RX_LEADER", @@ -411,58 +466,59 @@ var FailureCase_name = map[int32]string{ 400: "FAILPOINTS", 500: "EXTERNAL", } -var FailureCase_value = map[string]int32{ - "SIGTERM_ONE_FOLLOWER": 0, - "SIGTERM_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT": 1, - "SIGTERM_LEADER": 2, - "SIGTERM_LEADER_UNTIL_TRIGGER_SNAPSHOT": 3, - "SIGTERM_QUORUM": 4, - "SIGTERM_ALL": 5, - "SIGQUIT_AND_REMOVE_ONE_FOLLOWER": 10, - "SIGQUIT_AND_REMOVE_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT": 11, - "SIGQUIT_AND_REMOVE_LEADER": 12, - "SIGQUIT_AND_REMOVE_LEADER_UNTIL_TRIGGER_SNAPSHOT": 13, - "BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER": 100, - "BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT": 101, - "BLACKHOLE_PEER_PORT_TX_RX_LEADER": 102, - "BLACKHOLE_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT": 103, - "BLACKHOLE_PEER_PORT_TX_RX_QUORUM": 104, - "BLACKHOLE_PEER_PORT_TX_RX_ALL": 105, - "DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER": 200, - "RANDOM_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER": 201, - "DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT": 202, - "RANDOM_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT": 203, - "DELAY_PEER_PORT_TX_RX_LEADER": 204, - "RANDOM_DELAY_PEER_PORT_TX_RX_LEADER": 205, - "DELAY_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT": 206, - "RANDOM_DELAY_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT": 207, - "DELAY_PEER_PORT_TX_RX_QUORUM": 208, - "RANDOM_DELAY_PEER_PORT_TX_RX_QUORUM": 209, - "DELAY_PEER_PORT_TX_RX_ALL": 210, - "RANDOM_DELAY_PEER_PORT_TX_RX_ALL": 211, - "NO_FAIL_WITH_STRESS": 300, - "NO_FAIL_WITH_NO_STRESS_FOR_LIVENESS": 301, - "FAILPOINTS": 400, - "EXTERNAL": 500, +var Case_value = map[string]int32{ + "SIGTERM_ONE_FOLLOWER": 0, + "SIGTERM_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT": 1, + "SIGTERM_LEADER": 2, + "SIGTERM_LEADER_UNTIL_TRIGGER_SNAPSHOT": 3, + "SIGTERM_QUORUM": 4, + "SIGTERM_ALL": 5, + "SIGQUIT_AND_REMOVE_ONE_FOLLOWER": 10, + "SIGQUIT_AND_REMOVE_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT": 11, + "SIGQUIT_AND_REMOVE_LEADER": 12, + "SIGQUIT_AND_REMOVE_LEADER_UNTIL_TRIGGER_SNAPSHOT": 13, + "SIGQUIT_AND_REMOVE_QUORUM_AND_RESTORE_LEADER_SNAPSHOT_FROM_SCRATCH": 14, + "BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER": 100, + "BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT": 101, + "BLACKHOLE_PEER_PORT_TX_RX_LEADER": 102, + "BLACKHOLE_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT": 103, + "BLACKHOLE_PEER_PORT_TX_RX_QUORUM": 104, + "BLACKHOLE_PEER_PORT_TX_RX_ALL": 105, + "DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER": 200, + "RANDOM_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER": 201, + "DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT": 202, + "RANDOM_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT": 203, + "DELAY_PEER_PORT_TX_RX_LEADER": 204, + "RANDOM_DELAY_PEER_PORT_TX_RX_LEADER": 205, + "DELAY_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT": 206, + "RANDOM_DELAY_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT": 207, + "DELAY_PEER_PORT_TX_RX_QUORUM": 208, + "RANDOM_DELAY_PEER_PORT_TX_RX_QUORUM": 209, + "DELAY_PEER_PORT_TX_RX_ALL": 210, + "RANDOM_DELAY_PEER_PORT_TX_RX_ALL": 211, + "NO_FAIL_WITH_STRESS": 300, + "NO_FAIL_WITH_NO_STRESS_FOR_LIVENESS": 301, + "FAILPOINTS": 400, + "EXTERNAL": 500, } -func (x FailureCase) String() string { - return proto.EnumName(FailureCase_name, int32(x)) +func (x Case) String() string { + return proto.EnumName(Case_name, int32(x)) } -func (FailureCase) EnumDescriptor() ([]byte, []int) { return fileDescriptorRpc, []int{1} } +func (Case) EnumDescriptor() ([]byte, []int) { return fileDescriptorRpc, []int{1} } -type StressType int32 +type Stresser int32 const ( - StressType_KV StressType = 0 - StressType_LEASE StressType = 1 - StressType_ELECTION_RUNNER StressType = 2 - StressType_WATCH_RUNNER StressType = 3 - StressType_LOCK_RACER_RUNNER StressType = 4 - StressType_LEASE_RUNNER StressType = 5 + Stresser_KV Stresser = 0 + Stresser_LEASE Stresser = 1 + Stresser_ELECTION_RUNNER Stresser = 2 + Stresser_WATCH_RUNNER Stresser = 3 + Stresser_LOCK_RACER_RUNNER Stresser = 4 + Stresser_LEASE_RUNNER Stresser = 5 ) -var StressType_name = map[int32]string{ +var Stresser_name = map[int32]string{ 0: "KV", 1: "LEASE", 2: "ELECTION_RUNNER", @@ -470,7 +526,7 @@ var StressType_name = map[int32]string{ 4: "LOCK_RACER_RUNNER", 5: "LEASE_RUNNER", } -var StressType_value = map[string]int32{ +var Stresser_value = map[string]int32{ "KV": 0, "LEASE": 1, "ELECTION_RUNNER": 2, @@ -479,10 +535,37 @@ var StressType_value = map[string]int32{ "LEASE_RUNNER": 5, } -func (x StressType) String() string { - return proto.EnumName(StressType_name, int32(x)) +func (x Stresser) String() string { + return proto.EnumName(Stresser_name, int32(x)) +} +func (Stresser) EnumDescriptor() ([]byte, []int) { return fileDescriptorRpc, []int{2} } + +type Checker int32 + +const ( + Checker_KV_HASH Checker = 0 + Checker_LEASE_EXPIRE Checker = 1 + Checker_RUNNER Checker = 2 + Checker_NO_CHECK Checker = 3 +) + +var Checker_name = map[int32]string{ + 0: "KV_HASH", + 1: "LEASE_EXPIRE", + 2: "RUNNER", + 3: "NO_CHECK", +} +var Checker_value = map[string]int32{ + "KV_HASH": 0, + "LEASE_EXPIRE": 1, + "RUNNER": 2, + "NO_CHECK": 3, +} + +func (x Checker) String() string { + return proto.EnumName(Checker_name, int32(x)) } -func (StressType) EnumDescriptor() ([]byte, []int) { return fileDescriptorRpc, []int{2} } +func (Checker) EnumDescriptor() ([]byte, []int) { return fileDescriptorRpc, []int{3} } type Request struct { Operation Operation `protobuf:"varint,1,opt,name=Operation,proto3,enum=rpcpb.Operation" json:"Operation,omitempty"` @@ -497,17 +580,37 @@ func (m *Request) String() string { return proto.CompactTextString(m) func (*Request) ProtoMessage() {} func (*Request) Descriptor() ([]byte, []int) { return fileDescriptorRpc, []int{0} } +// SnapshotInfo contains SAVE_SNAPSHOT request results. +type SnapshotInfo struct { + MemberName string `protobuf:"bytes,1,opt,name=MemberName,proto3" json:"MemberName,omitempty"` + MemberClientURLs []string `protobuf:"bytes,2,rep,name=MemberClientURLs" json:"MemberClientURLs,omitempty"` + SnapshotPath string `protobuf:"bytes,3,opt,name=SnapshotPath,proto3" json:"SnapshotPath,omitempty"` + SnapshotFileSize string `protobuf:"bytes,4,opt,name=SnapshotFileSize,proto3" json:"SnapshotFileSize,omitempty"` + SnapshotTotalSize string `protobuf:"bytes,5,opt,name=SnapshotTotalSize,proto3" json:"SnapshotTotalSize,omitempty"` + SnapshotTotalKey int64 `protobuf:"varint,6,opt,name=SnapshotTotalKey,proto3" json:"SnapshotTotalKey,omitempty"` + SnapshotHash int64 `protobuf:"varint,7,opt,name=SnapshotHash,proto3" json:"SnapshotHash,omitempty"` + SnapshotRevision int64 `protobuf:"varint,8,opt,name=SnapshotRevision,proto3" json:"SnapshotRevision,omitempty"` + Took string `protobuf:"bytes,9,opt,name=Took,proto3" json:"Took,omitempty"` +} + +func (m *SnapshotInfo) Reset() { *m = SnapshotInfo{} } +func (m *SnapshotInfo) String() string { return proto.CompactTextString(m) } +func (*SnapshotInfo) ProtoMessage() {} +func (*SnapshotInfo) Descriptor() ([]byte, []int) { return fileDescriptorRpc, []int{1} } + type Response struct { Success bool `protobuf:"varint,1,opt,name=Success,proto3" json:"Success,omitempty"` Status string `protobuf:"bytes,2,opt,name=Status,proto3" json:"Status,omitempty"` // Member contains the same Member object from tester request. Member *Member `protobuf:"bytes,3,opt,name=Member" json:"Member,omitempty"` + // SnapshotInfo contains SAVE_SNAPSHOT request results. + SnapshotInfo *SnapshotInfo `protobuf:"bytes,4,opt,name=SnapshotInfo" json:"SnapshotInfo,omitempty"` } func (m *Response) Reset() { *m = Response{} } func (m *Response) String() string { return proto.CompactTextString(m) } func (*Response) ProtoMessage() {} -func (*Response) Descriptor() ([]byte, []int) { return fileDescriptorRpc, []int{1} } +func (*Response) Descriptor() ([]byte, []int) { return fileDescriptorRpc, []int{2} } type Member struct { // EtcdExecPath is the executable etcd binary path in agent server. @@ -530,6 +633,9 @@ type Member struct { EtcdClientEndpoint string `protobuf:"bytes,301,opt,name=EtcdClientEndpoint,proto3" json:"EtcdClientEndpoint,omitempty" yaml:"etcd-client-endpoint"` // Etcd defines etcd binary configuration flags. Etcd *Etcd `protobuf:"bytes,302,opt,name=Etcd" json:"Etcd,omitempty" yaml:"etcd"` + // EtcdOnSnapshotRestore defines one-time use configuration during etcd + // snapshot recovery process. + EtcdOnSnapshotRestore *Etcd `protobuf:"bytes,303,opt,name=EtcdOnSnapshotRestore" json:"EtcdOnSnapshotRestore,omitempty"` // ClientCertData contains cert file contents from this member's etcd server. ClientCertData string `protobuf:"bytes,401,opt,name=ClientCertData,proto3" json:"ClientCertData,omitempty" yaml:"client-cert-data"` ClientCertPath string `protobuf:"bytes,402,opt,name=ClientCertPath,proto3" json:"ClientCertPath,omitempty" yaml:"client-cert-path"` @@ -548,12 +654,16 @@ type Member struct { // PeerTrustedCAData contains trusted CA file contents from this member's etcd server. PeerTrustedCAData string `protobuf:"bytes,505,opt,name=PeerTrustedCAData,proto3" json:"PeerTrustedCAData,omitempty" yaml:"peer-trusted-ca-data"` PeerTrustedCAPath string `protobuf:"bytes,506,opt,name=PeerTrustedCAPath,proto3" json:"PeerTrustedCAPath,omitempty" yaml:"peer-trusted-ca-path"` + // SnapshotPath is the snapshot file path to store or restore from. + SnapshotPath string `protobuf:"bytes,601,opt,name=SnapshotPath,proto3" json:"SnapshotPath,omitempty" yaml:"snapshot-path"` + // SnapshotInfo contains last SAVE_SNAPSHOT request results. + SnapshotInfo *SnapshotInfo `protobuf:"bytes,602,opt,name=SnapshotInfo" json:"SnapshotInfo,omitempty"` } func (m *Member) Reset() { *m = Member{} } func (m *Member) String() string { return proto.CompactTextString(m) } func (*Member) ProtoMessage() {} -func (*Member) Descriptor() ([]byte, []int) { return fileDescriptorRpc, []int{2} } +func (*Member) Descriptor() ([]byte, []int) { return fileDescriptorRpc, []int{3} } type Tester struct { DataDir string `protobuf:"bytes,1,opt,name=DataDir,proto3" json:"DataDir,omitempty" yaml:"data-dir"` @@ -570,52 +680,55 @@ type Tester struct { UpdatedDelayLatencyMs uint32 `protobuf:"varint,13,opt,name=UpdatedDelayLatencyMs,proto3" json:"UpdatedDelayLatencyMs,omitempty" yaml:"updated-delay-latency-ms"` // RoundLimit is the limit of rounds to run failure set (-1 to run without limits). RoundLimit int32 `protobuf:"varint,21,opt,name=RoundLimit,proto3" json:"RoundLimit,omitempty" yaml:"round-limit"` - // ExitOnFailure is true, then exit tester on first failure. - ExitOnFailure bool `protobuf:"varint,22,opt,name=ExitOnFailure,proto3" json:"ExitOnFailure,omitempty" yaml:"exit-on-failure"` - // ConsistencyCheck is true to check consistency (revision, hash). - ConsistencyCheck bool `protobuf:"varint,23,opt,name=ConsistencyCheck,proto3" json:"ConsistencyCheck,omitempty" yaml:"consistency-check"` + // ExitOnCaseFail is true, then exit tester on first failure. + ExitOnCaseFail bool `protobuf:"varint,22,opt,name=ExitOnCaseFail,proto3" json:"ExitOnCaseFail,omitempty" yaml:"exit-on-failure"` // EnablePprof is true to enable profiler. - EnablePprof bool `protobuf:"varint,24,opt,name=EnablePprof,proto3" json:"EnablePprof,omitempty" yaml:"enable-pprof"` - // FailureDelayMs is the delay duration after failure is injected. + EnablePprof bool `protobuf:"varint,23,opt,name=EnablePprof,proto3" json:"EnablePprof,omitempty" yaml:"enable-pprof"` + // CaseDelayMs is the delay duration after failure is injected. // Useful when triggering snapshot or no-op failure cases. - FailureDelayMs uint32 `protobuf:"varint,31,opt,name=FailureDelayMs,proto3" json:"FailureDelayMs,omitempty" yaml:"failure-delay-ms"` - // FailureShuffle is true to randomize failure injecting order. - FailureShuffle bool `protobuf:"varint,32,opt,name=FailureShuffle,proto3" json:"FailureShuffle,omitempty" yaml:"failure-shuffle"` - // FailureCases is the selected test cases to schedule. + CaseDelayMs uint32 `protobuf:"varint,31,opt,name=CaseDelayMs,proto3" json:"CaseDelayMs,omitempty" yaml:"case-delay-ms"` + // CaseShuffle is true to randomize failure injecting order. + CaseShuffle bool `protobuf:"varint,32,opt,name=CaseShuffle,proto3" json:"CaseShuffle,omitempty" yaml:"case-shuffle"` + // Cases is the selected test cases to schedule. // If empty, run all failure cases. - FailureCases []string `protobuf:"bytes,33,rep,name=FailureCases" json:"FailureCases,omitempty" yaml:"failure-cases"` - // Failpoinommands is the list of "gofail" commands (e.g. panic("etcd-tester"),1*sleep(1000) + Cases []string `protobuf:"bytes,33,rep,name=Cases" json:"Cases,omitempty" yaml:"cases"` + // FailpointCommands is the list of "gofail" commands + // (e.g. panic("etcd-tester"),1*sleep(1000). FailpointCommands []string `protobuf:"bytes,34,rep,name=FailpointCommands" json:"FailpointCommands,omitempty" yaml:"failpoint-commands"` // RunnerExecPath is a path of etcd-runner binary. RunnerExecPath string `protobuf:"bytes,41,opt,name=RunnerExecPath,proto3" json:"RunnerExecPath,omitempty" yaml:"runner-exec-path"` // ExternalExecPath is a path of script for enabling/disabling an external fault injector. ExternalExecPath string `protobuf:"bytes,42,opt,name=ExternalExecPath,proto3" json:"ExternalExecPath,omitempty" yaml:"external-exec-path"` - // StressTypes is the list of stresser names: - // keys, lease, nop, election-runner, watch-runner, lock-racer-runner, lease-runner. - StressTypes []string `protobuf:"bytes,101,rep,name=StressTypes" json:"StressTypes,omitempty" yaml:"stress-types"` + // Stressers is the list of stresser types: + // KV, LEASE, ELECTION_RUNNER, WATCH_RUNNER, LOCK_RACER_RUNNER, LEASE_RUNNER. + Stressers []string `protobuf:"bytes,101,rep,name=Stressers" json:"Stressers,omitempty" yaml:"stressers"` + // Checkers is the list of consistency checker types: + // KV_HASH, LEASE_EXPIRE, NO_CHECK, RUNNER. + // Leave empty to skip consistency checks. + Checkers []string `protobuf:"bytes,102,rep,name=Checkers" json:"Checkers,omitempty" yaml:"checkers"` // StressKeySize is the size of each small key written into etcd. - StressKeySize int32 `protobuf:"varint,102,opt,name=StressKeySize,proto3" json:"StressKeySize,omitempty" yaml:"stress-key-size"` + StressKeySize int32 `protobuf:"varint,201,opt,name=StressKeySize,proto3" json:"StressKeySize,omitempty" yaml:"stress-key-size"` // StressKeySizeLarge is the size of each large key written into etcd. - StressKeySizeLarge int32 `protobuf:"varint,103,opt,name=StressKeySizeLarge,proto3" json:"StressKeySizeLarge,omitempty" yaml:"stress-key-size-large"` + StressKeySizeLarge int32 `protobuf:"varint,202,opt,name=StressKeySizeLarge,proto3" json:"StressKeySizeLarge,omitempty" yaml:"stress-key-size-large"` // StressKeySuffixRange is the count of key range written into etcd. // Stress keys are created with "fmt.Sprintf("foo%016x", rand.Intn(keySuffixRange)". - StressKeySuffixRange int32 `protobuf:"varint,104,opt,name=StressKeySuffixRange,proto3" json:"StressKeySuffixRange,omitempty" yaml:"stress-key-suffix-range"` + StressKeySuffixRange int32 `protobuf:"varint,203,opt,name=StressKeySuffixRange,proto3" json:"StressKeySuffixRange,omitempty" yaml:"stress-key-suffix-range"` // StressKeySuffixRangeTxn is the count of key range written into etcd txn (max 100). // Stress keys are created with "fmt.Sprintf("/k%03d", i)". - StressKeySuffixRangeTxn int32 `protobuf:"varint,105,opt,name=StressKeySuffixRangeTxn,proto3" json:"StressKeySuffixRangeTxn,omitempty" yaml:"stress-key-suffix-range-txn"` + StressKeySuffixRangeTxn int32 `protobuf:"varint,204,opt,name=StressKeySuffixRangeTxn,proto3" json:"StressKeySuffixRangeTxn,omitempty" yaml:"stress-key-suffix-range-txn"` // StressKeyTxnOps is the number of operations per a transaction (max 64). - StressKeyTxnOps int32 `protobuf:"varint,106,opt,name=StressKeyTxnOps,proto3" json:"StressKeyTxnOps,omitempty" yaml:"stress-key-txn-ops"` + StressKeyTxnOps int32 `protobuf:"varint,205,opt,name=StressKeyTxnOps,proto3" json:"StressKeyTxnOps,omitempty" yaml:"stress-key-txn-ops"` // StressClients is the number of concurrent stressing clients // with "one" shared TCP connection. - StressClients int32 `protobuf:"varint,201,opt,name=StressClients,proto3" json:"StressClients,omitempty" yaml:"stress-clients"` + StressClients int32 `protobuf:"varint,301,opt,name=StressClients,proto3" json:"StressClients,omitempty" yaml:"stress-clients"` // StressQPS is the maximum number of stresser requests per second. - StressQPS int32 `protobuf:"varint,202,opt,name=StressQPS,proto3" json:"StressQPS,omitempty" yaml:"stress-qps"` + StressQPS int32 `protobuf:"varint,302,opt,name=StressQPS,proto3" json:"StressQPS,omitempty" yaml:"stress-qps"` } func (m *Tester) Reset() { *m = Tester{} } func (m *Tester) String() string { return proto.CompactTextString(m) } func (*Tester) ProtoMessage() {} -func (*Tester) Descriptor() ([]byte, []int) { return fileDescriptorRpc, []int{3} } +func (*Tester) Descriptor() ([]byte, []int) { return fileDescriptorRpc, []int{4} } type Etcd struct { Name string `protobuf:"bytes,1,opt,name=Name,proto3" json:"Name,omitempty" yaml:"name"` @@ -653,17 +766,19 @@ type Etcd struct { func (m *Etcd) Reset() { *m = Etcd{} } func (m *Etcd) String() string { return proto.CompactTextString(m) } func (*Etcd) ProtoMessage() {} -func (*Etcd) Descriptor() ([]byte, []int) { return fileDescriptorRpc, []int{4} } +func (*Etcd) Descriptor() ([]byte, []int) { return fileDescriptorRpc, []int{5} } func init() { proto.RegisterType((*Request)(nil), "rpcpb.Request") + proto.RegisterType((*SnapshotInfo)(nil), "rpcpb.SnapshotInfo") proto.RegisterType((*Response)(nil), "rpcpb.Response") proto.RegisterType((*Member)(nil), "rpcpb.Member") proto.RegisterType((*Tester)(nil), "rpcpb.Tester") proto.RegisterType((*Etcd)(nil), "rpcpb.Etcd") proto.RegisterEnum("rpcpb.Operation", Operation_name, Operation_value) - proto.RegisterEnum("rpcpb.FailureCase", FailureCase_name, FailureCase_value) - proto.RegisterEnum("rpcpb.StressType", StressType_name, StressType_value) + proto.RegisterEnum("rpcpb.Case", Case_name, Case_value) + proto.RegisterEnum("rpcpb.Stresser", Stresser_name, Stresser_value) + proto.RegisterEnum("rpcpb.Checker", Checker_name, Checker_value) } // Reference imports to suppress errors if they are not otherwise used. @@ -813,6 +928,84 @@ func (m *Request) MarshalTo(dAtA []byte) (int, error) { return i, nil } +func (m *SnapshotInfo) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *SnapshotInfo) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if len(m.MemberName) > 0 { + dAtA[i] = 0xa + i++ + i = encodeVarintRpc(dAtA, i, uint64(len(m.MemberName))) + i += copy(dAtA[i:], m.MemberName) + } + if len(m.MemberClientURLs) > 0 { + for _, s := range m.MemberClientURLs { + dAtA[i] = 0x12 + i++ + l = len(s) + for l >= 1<<7 { + dAtA[i] = uint8(uint64(l)&0x7f | 0x80) + l >>= 7 + i++ + } + dAtA[i] = uint8(l) + i++ + i += copy(dAtA[i:], s) + } + } + if len(m.SnapshotPath) > 0 { + dAtA[i] = 0x1a + i++ + i = encodeVarintRpc(dAtA, i, uint64(len(m.SnapshotPath))) + i += copy(dAtA[i:], m.SnapshotPath) + } + if len(m.SnapshotFileSize) > 0 { + dAtA[i] = 0x22 + i++ + i = encodeVarintRpc(dAtA, i, uint64(len(m.SnapshotFileSize))) + i += copy(dAtA[i:], m.SnapshotFileSize) + } + if len(m.SnapshotTotalSize) > 0 { + dAtA[i] = 0x2a + i++ + i = encodeVarintRpc(dAtA, i, uint64(len(m.SnapshotTotalSize))) + i += copy(dAtA[i:], m.SnapshotTotalSize) + } + if m.SnapshotTotalKey != 0 { + dAtA[i] = 0x30 + i++ + i = encodeVarintRpc(dAtA, i, uint64(m.SnapshotTotalKey)) + } + if m.SnapshotHash != 0 { + dAtA[i] = 0x38 + i++ + i = encodeVarintRpc(dAtA, i, uint64(m.SnapshotHash)) + } + if m.SnapshotRevision != 0 { + dAtA[i] = 0x40 + i++ + i = encodeVarintRpc(dAtA, i, uint64(m.SnapshotRevision)) + } + if len(m.Took) > 0 { + dAtA[i] = 0x4a + i++ + i = encodeVarintRpc(dAtA, i, uint64(len(m.Took))) + i += copy(dAtA[i:], m.Took) + } + return i, nil +} + func (m *Response) Marshal() (dAtA []byte, err error) { size := m.Size() dAtA = make([]byte, size) @@ -854,6 +1047,16 @@ func (m *Response) MarshalTo(dAtA []byte) (int, error) { } i += n3 } + if m.SnapshotInfo != nil { + dAtA[i] = 0x22 + i++ + i = encodeVarintRpc(dAtA, i, uint64(m.SnapshotInfo.Size())) + n4, err := m.SnapshotInfo.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n4 + } return i, nil } @@ -944,11 +1147,23 @@ func (m *Member) MarshalTo(dAtA []byte) (int, error) { dAtA[i] = 0x12 i++ i = encodeVarintRpc(dAtA, i, uint64(m.Etcd.Size())) - n4, err := m.Etcd.MarshalTo(dAtA[i:]) + n5, err := m.Etcd.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n4 + i += n5 + } + if m.EtcdOnSnapshotRestore != nil { + dAtA[i] = 0xfa + i++ + dAtA[i] = 0x12 + i++ + i = encodeVarintRpc(dAtA, i, uint64(m.EtcdOnSnapshotRestore.Size())) + n6, err := m.EtcdOnSnapshotRestore.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n6 } if len(m.ClientCertData) > 0 { dAtA[i] = 0x8a @@ -1046,6 +1261,26 @@ func (m *Member) MarshalTo(dAtA []byte) (int, error) { i = encodeVarintRpc(dAtA, i, uint64(len(m.PeerTrustedCAPath))) i += copy(dAtA[i:], m.PeerTrustedCAPath) } + if len(m.SnapshotPath) > 0 { + dAtA[i] = 0xca + i++ + dAtA[i] = 0x25 + i++ + i = encodeVarintRpc(dAtA, i, uint64(len(m.SnapshotPath))) + i += copy(dAtA[i:], m.SnapshotPath) + } + if m.SnapshotInfo != nil { + dAtA[i] = 0xd2 + i++ + dAtA[i] = 0x25 + i++ + i = encodeVarintRpc(dAtA, i, uint64(m.SnapshotInfo.Size())) + n7, err := m.SnapshotInfo.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n7 + } return i, nil } @@ -1104,24 +1339,12 @@ func (m *Tester) MarshalTo(dAtA []byte) (int, error) { i++ i = encodeVarintRpc(dAtA, i, uint64(m.RoundLimit)) } - if m.ExitOnFailure { + if m.ExitOnCaseFail { dAtA[i] = 0xb0 i++ dAtA[i] = 0x1 i++ - if m.ExitOnFailure { - dAtA[i] = 1 - } else { - dAtA[i] = 0 - } - i++ - } - if m.ConsistencyCheck { - dAtA[i] = 0xb8 - i++ - dAtA[i] = 0x1 - i++ - if m.ConsistencyCheck { + if m.ExitOnCaseFail { dAtA[i] = 1 } else { dAtA[i] = 0 @@ -1129,7 +1352,7 @@ func (m *Tester) MarshalTo(dAtA []byte) (int, error) { i++ } if m.EnablePprof { - dAtA[i] = 0xc0 + dAtA[i] = 0xb8 i++ dAtA[i] = 0x1 i++ @@ -1140,27 +1363,27 @@ func (m *Tester) MarshalTo(dAtA []byte) (int, error) { } i++ } - if m.FailureDelayMs != 0 { + if m.CaseDelayMs != 0 { dAtA[i] = 0xf8 i++ dAtA[i] = 0x1 i++ - i = encodeVarintRpc(dAtA, i, uint64(m.FailureDelayMs)) + i = encodeVarintRpc(dAtA, i, uint64(m.CaseDelayMs)) } - if m.FailureShuffle { + if m.CaseShuffle { dAtA[i] = 0x80 i++ dAtA[i] = 0x2 i++ - if m.FailureShuffle { + if m.CaseShuffle { dAtA[i] = 1 } else { dAtA[i] = 0 } i++ } - if len(m.FailureCases) > 0 { - for _, s := range m.FailureCases { + if len(m.Cases) > 0 { + for _, s := range m.Cases { dAtA[i] = 0x8a i++ dAtA[i] = 0x2 @@ -1209,8 +1432,8 @@ func (m *Tester) MarshalTo(dAtA []byte) (int, error) { i = encodeVarintRpc(dAtA, i, uint64(len(m.ExternalExecPath))) i += copy(dAtA[i:], m.ExternalExecPath) } - if len(m.StressTypes) > 0 { - for _, s := range m.StressTypes { + if len(m.Stressers) > 0 { + for _, s := range m.Stressers { dAtA[i] = 0xaa i++ dAtA[i] = 0x6 @@ -1226,52 +1449,69 @@ func (m *Tester) MarshalTo(dAtA []byte) (int, error) { i += copy(dAtA[i:], s) } } + if len(m.Checkers) > 0 { + for _, s := range m.Checkers { + dAtA[i] = 0xb2 + i++ + dAtA[i] = 0x6 + i++ + l = len(s) + for l >= 1<<7 { + dAtA[i] = uint8(uint64(l)&0x7f | 0x80) + l >>= 7 + i++ + } + dAtA[i] = uint8(l) + i++ + i += copy(dAtA[i:], s) + } + } if m.StressKeySize != 0 { - dAtA[i] = 0xb0 + dAtA[i] = 0xc8 i++ - dAtA[i] = 0x6 + dAtA[i] = 0xc i++ i = encodeVarintRpc(dAtA, i, uint64(m.StressKeySize)) } if m.StressKeySizeLarge != 0 { - dAtA[i] = 0xb8 + dAtA[i] = 0xd0 i++ - dAtA[i] = 0x6 + dAtA[i] = 0xc i++ i = encodeVarintRpc(dAtA, i, uint64(m.StressKeySizeLarge)) } if m.StressKeySuffixRange != 0 { - dAtA[i] = 0xc0 + dAtA[i] = 0xd8 i++ - dAtA[i] = 0x6 + dAtA[i] = 0xc i++ i = encodeVarintRpc(dAtA, i, uint64(m.StressKeySuffixRange)) } if m.StressKeySuffixRangeTxn != 0 { - dAtA[i] = 0xc8 + dAtA[i] = 0xe0 i++ - dAtA[i] = 0x6 + dAtA[i] = 0xc i++ i = encodeVarintRpc(dAtA, i, uint64(m.StressKeySuffixRangeTxn)) } if m.StressKeyTxnOps != 0 { - dAtA[i] = 0xd0 + dAtA[i] = 0xe8 i++ - dAtA[i] = 0x6 + dAtA[i] = 0xc i++ i = encodeVarintRpc(dAtA, i, uint64(m.StressKeyTxnOps)) } if m.StressClients != 0 { - dAtA[i] = 0xc8 + dAtA[i] = 0xe8 i++ - dAtA[i] = 0xc + dAtA[i] = 0x12 i++ i = encodeVarintRpc(dAtA, i, uint64(m.StressClients)) } if m.StressQPS != 0 { - dAtA[i] = 0xd0 + dAtA[i] = 0xf0 i++ - dAtA[i] = 0xc + dAtA[i] = 0x12 i++ i = encodeVarintRpc(dAtA, i, uint64(m.StressQPS)) } @@ -1576,6 +1816,47 @@ func (m *Request) Size() (n int) { return n } +func (m *SnapshotInfo) Size() (n int) { + var l int + _ = l + l = len(m.MemberName) + if l > 0 { + n += 1 + l + sovRpc(uint64(l)) + } + if len(m.MemberClientURLs) > 0 { + for _, s := range m.MemberClientURLs { + l = len(s) + n += 1 + l + sovRpc(uint64(l)) + } + } + l = len(m.SnapshotPath) + if l > 0 { + n += 1 + l + sovRpc(uint64(l)) + } + l = len(m.SnapshotFileSize) + if l > 0 { + n += 1 + l + sovRpc(uint64(l)) + } + l = len(m.SnapshotTotalSize) + if l > 0 { + n += 1 + l + sovRpc(uint64(l)) + } + if m.SnapshotTotalKey != 0 { + n += 1 + sovRpc(uint64(m.SnapshotTotalKey)) + } + if m.SnapshotHash != 0 { + n += 1 + sovRpc(uint64(m.SnapshotHash)) + } + if m.SnapshotRevision != 0 { + n += 1 + sovRpc(uint64(m.SnapshotRevision)) + } + l = len(m.Took) + if l > 0 { + n += 1 + l + sovRpc(uint64(l)) + } + return n +} + func (m *Response) Size() (n int) { var l int _ = l @@ -1590,6 +1871,10 @@ func (m *Response) Size() (n int) { l = m.Member.Size() n += 1 + l + sovRpc(uint64(l)) } + if m.SnapshotInfo != nil { + l = m.SnapshotInfo.Size() + n += 1 + l + sovRpc(uint64(l)) + } return n } @@ -1630,6 +1915,10 @@ func (m *Member) Size() (n int) { l = m.Etcd.Size() n += 2 + l + sovRpc(uint64(l)) } + if m.EtcdOnSnapshotRestore != nil { + l = m.EtcdOnSnapshotRestore.Size() + n += 2 + l + sovRpc(uint64(l)) + } l = len(m.ClientCertData) if l > 0 { n += 2 + l + sovRpc(uint64(l)) @@ -1678,6 +1967,14 @@ func (m *Member) Size() (n int) { if l > 0 { n += 2 + l + sovRpc(uint64(l)) } + l = len(m.SnapshotPath) + if l > 0 { + n += 2 + l + sovRpc(uint64(l)) + } + if m.SnapshotInfo != nil { + l = m.SnapshotInfo.Size() + n += 2 + l + sovRpc(uint64(l)) + } return n } @@ -1708,23 +2005,20 @@ func (m *Tester) Size() (n int) { if m.RoundLimit != 0 { n += 2 + sovRpc(uint64(m.RoundLimit)) } - if m.ExitOnFailure { - n += 3 - } - if m.ConsistencyCheck { + if m.ExitOnCaseFail { n += 3 } if m.EnablePprof { n += 3 } - if m.FailureDelayMs != 0 { - n += 2 + sovRpc(uint64(m.FailureDelayMs)) + if m.CaseDelayMs != 0 { + n += 2 + sovRpc(uint64(m.CaseDelayMs)) } - if m.FailureShuffle { + if m.CaseShuffle { n += 3 } - if len(m.FailureCases) > 0 { - for _, s := range m.FailureCases { + if len(m.Cases) > 0 { + for _, s := range m.Cases { l = len(s) n += 2 + l + sovRpc(uint64(l)) } @@ -1743,8 +2037,14 @@ func (m *Tester) Size() (n int) { if l > 0 { n += 2 + l + sovRpc(uint64(l)) } - if len(m.StressTypes) > 0 { - for _, s := range m.StressTypes { + if len(m.Stressers) > 0 { + for _, s := range m.Stressers { + l = len(s) + n += 2 + l + sovRpc(uint64(l)) + } + } + if len(m.Checkers) > 0 { + for _, s := range m.Checkers { l = len(s) n += 2 + l + sovRpc(uint64(l)) } @@ -2029,7 +2329,7 @@ func (m *Request) Unmarshal(dAtA []byte) error { } return nil } -func (m *Response) Unmarshal(dAtA []byte) error { +func (m *SnapshotInfo) Unmarshal(dAtA []byte) error { l := len(dAtA) iNdEx := 0 for iNdEx < l { @@ -2052,17 +2352,17 @@ func (m *Response) Unmarshal(dAtA []byte) error { fieldNum := int32(wire >> 3) wireType := int(wire & 0x7) if wireType == 4 { - return fmt.Errorf("proto: Response: wiretype end group for non-group") + return fmt.Errorf("proto: SnapshotInfo: wiretype end group for non-group") } if fieldNum <= 0 { - return fmt.Errorf("proto: Response: illegal tag %d (wire type %d)", fieldNum, wire) + return fmt.Errorf("proto: SnapshotInfo: illegal tag %d (wire type %d)", fieldNum, wire) } switch fieldNum { case 1: - if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field Success", wireType) + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field MemberName", wireType) } - var v int + var stringLen uint64 for shift := uint(0); ; shift += 7 { if shift >= 64 { return ErrIntOverflowRpc @@ -2072,15 +2372,24 @@ func (m *Response) Unmarshal(dAtA []byte) error { } b := dAtA[iNdEx] iNdEx++ - v |= (int(b) & 0x7F) << shift + stringLen |= (uint64(b) & 0x7F) << shift if b < 0x80 { break } } - m.Success = bool(v != 0) + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthRpc + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.MemberName = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex case 2: if wireType != 2 { - return fmt.Errorf("proto: wrong wireType = %d for field Status", wireType) + return fmt.Errorf("proto: wrong wireType = %d for field MemberClientURLs", wireType) } var stringLen uint64 for shift := uint(0); ; shift += 7 { @@ -2105,13 +2414,13 @@ func (m *Response) Unmarshal(dAtA []byte) error { if postIndex > l { return io.ErrUnexpectedEOF } - m.Status = string(dAtA[iNdEx:postIndex]) + m.MemberClientURLs = append(m.MemberClientURLs, string(dAtA[iNdEx:postIndex])) iNdEx = postIndex case 3: if wireType != 2 { - return fmt.Errorf("proto: wrong wireType = %d for field Member", wireType) + return fmt.Errorf("proto: wrong wireType = %d for field SnapshotPath", wireType) } - var msglen int + var stringLen uint64 for shift := uint(0); ; shift += 7 { if shift >= 64 { return ErrIntOverflowRpc @@ -2121,46 +2430,351 @@ func (m *Response) Unmarshal(dAtA []byte) error { } b := dAtA[iNdEx] iNdEx++ - msglen |= (int(b) & 0x7F) << shift + stringLen |= (uint64(b) & 0x7F) << shift if b < 0x80 { break } } - if msglen < 0 { + intStringLen := int(stringLen) + if intStringLen < 0 { return ErrInvalidLengthRpc } - postIndex := iNdEx + msglen + postIndex := iNdEx + intStringLen if postIndex > l { return io.ErrUnexpectedEOF } - if m.Member == nil { - m.Member = &Member{} - } - if err := m.Member.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { - return err - } + m.SnapshotPath = string(dAtA[iNdEx:postIndex]) iNdEx = postIndex - default: - iNdEx = preIndex - skippy, err := skipRpc(dAtA[iNdEx:]) - if err != nil { - return err + case 4: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field SnapshotFileSize", wireType) } - if skippy < 0 { + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { return ErrInvalidLengthRpc } - if (iNdEx + skippy) > l { + postIndex := iNdEx + intStringLen + if postIndex > l { return io.ErrUnexpectedEOF } - iNdEx += skippy - } - } - - if iNdEx > l { - return io.ErrUnexpectedEOF - } - return nil -} + m.SnapshotFileSize = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 5: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field SnapshotTotalSize", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthRpc + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.SnapshotTotalSize = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 6: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field SnapshotTotalKey", wireType) + } + m.SnapshotTotalKey = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.SnapshotTotalKey |= (int64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 7: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field SnapshotHash", wireType) + } + m.SnapshotHash = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.SnapshotHash |= (int64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 8: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field SnapshotRevision", wireType) + } + m.SnapshotRevision = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.SnapshotRevision |= (int64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 9: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Took", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthRpc + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Took = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipRpc(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthRpc + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *Response) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: Response: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: Response: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Success", wireType) + } + var v int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + v |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + m.Success = bool(v != 0) + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Status", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthRpc + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Status = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 3: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Member", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthRpc + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.Member == nil { + m.Member = &Member{} + } + if err := m.Member.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 4: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field SnapshotInfo", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthRpc + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.SnapshotInfo == nil { + m.SnapshotInfo = &SnapshotInfo{} + } + if err := m.SnapshotInfo.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipRpc(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthRpc + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} func (m *Member) Unmarshal(dAtA []byte) error { l := len(dAtA) iNdEx := 0 @@ -2437,6 +3051,39 @@ func (m *Member) Unmarshal(dAtA []byte) error { return err } iNdEx = postIndex + case 303: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field EtcdOnSnapshotRestore", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthRpc + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.EtcdOnSnapshotRestore == nil { + m.EtcdOnSnapshotRestore = &Etcd{} + } + if err := m.EtcdOnSnapshotRestore.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex case 401: if wireType != 2 { return fmt.Errorf("proto: wrong wireType = %d for field ClientCertData", wireType) @@ -2785,6 +3432,68 @@ func (m *Member) Unmarshal(dAtA []byte) error { } m.PeerTrustedCAPath = string(dAtA[iNdEx:postIndex]) iNdEx = postIndex + case 601: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field SnapshotPath", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthRpc + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.SnapshotPath = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 602: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field SnapshotInfo", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthRpc + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.SnapshotInfo == nil { + m.SnapshotInfo = &SnapshotInfo{} + } + if err := m.SnapshotInfo.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex default: iNdEx = preIndex skippy, err := skipRpc(dAtA[iNdEx:]) @@ -3000,7 +3709,7 @@ func (m *Tester) Unmarshal(dAtA []byte) error { } case 22: if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field ExitOnFailure", wireType) + return fmt.Errorf("proto: wrong wireType = %d for field ExitOnCaseFail", wireType) } var v int for shift := uint(0); ; shift += 7 { @@ -3017,28 +3726,8 @@ func (m *Tester) Unmarshal(dAtA []byte) error { break } } - m.ExitOnFailure = bool(v != 0) + m.ExitOnCaseFail = bool(v != 0) case 23: - if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field ConsistencyCheck", wireType) - } - var v int - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRpc - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - v |= (int(b) & 0x7F) << shift - if b < 0x80 { - break - } - } - m.ConsistencyCheck = bool(v != 0) - case 24: if wireType != 0 { return fmt.Errorf("proto: wrong wireType = %d for field EnablePprof", wireType) } @@ -3060,9 +3749,9 @@ func (m *Tester) Unmarshal(dAtA []byte) error { m.EnablePprof = bool(v != 0) case 31: if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field FailureDelayMs", wireType) + return fmt.Errorf("proto: wrong wireType = %d for field CaseDelayMs", wireType) } - m.FailureDelayMs = 0 + m.CaseDelayMs = 0 for shift := uint(0); ; shift += 7 { if shift >= 64 { return ErrIntOverflowRpc @@ -3072,14 +3761,14 @@ func (m *Tester) Unmarshal(dAtA []byte) error { } b := dAtA[iNdEx] iNdEx++ - m.FailureDelayMs |= (uint32(b) & 0x7F) << shift + m.CaseDelayMs |= (uint32(b) & 0x7F) << shift if b < 0x80 { break } } case 32: if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field FailureShuffle", wireType) + return fmt.Errorf("proto: wrong wireType = %d for field CaseShuffle", wireType) } var v int for shift := uint(0); ; shift += 7 { @@ -3096,10 +3785,10 @@ func (m *Tester) Unmarshal(dAtA []byte) error { break } } - m.FailureShuffle = bool(v != 0) + m.CaseShuffle = bool(v != 0) case 33: if wireType != 2 { - return fmt.Errorf("proto: wrong wireType = %d for field FailureCases", wireType) + return fmt.Errorf("proto: wrong wireType = %d for field Cases", wireType) } var stringLen uint64 for shift := uint(0); ; shift += 7 { @@ -3124,7 +3813,7 @@ func (m *Tester) Unmarshal(dAtA []byte) error { if postIndex > l { return io.ErrUnexpectedEOF } - m.FailureCases = append(m.FailureCases, string(dAtA[iNdEx:postIndex])) + m.Cases = append(m.Cases, string(dAtA[iNdEx:postIndex])) iNdEx = postIndex case 34: if wireType != 2 { @@ -3215,7 +3904,7 @@ func (m *Tester) Unmarshal(dAtA []byte) error { iNdEx = postIndex case 101: if wireType != 2 { - return fmt.Errorf("proto: wrong wireType = %d for field StressTypes", wireType) + return fmt.Errorf("proto: wrong wireType = %d for field Stressers", wireType) } var stringLen uint64 for shift := uint(0); ; shift += 7 { @@ -3240,9 +3929,38 @@ func (m *Tester) Unmarshal(dAtA []byte) error { if postIndex > l { return io.ErrUnexpectedEOF } - m.StressTypes = append(m.StressTypes, string(dAtA[iNdEx:postIndex])) + m.Stressers = append(m.Stressers, string(dAtA[iNdEx:postIndex])) iNdEx = postIndex case 102: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Checkers", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthRpc + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Checkers = append(m.Checkers, string(dAtA[iNdEx:postIndex])) + iNdEx = postIndex + case 201: if wireType != 0 { return fmt.Errorf("proto: wrong wireType = %d for field StressKeySize", wireType) } @@ -3261,7 +3979,7 @@ func (m *Tester) Unmarshal(dAtA []byte) error { break } } - case 103: + case 202: if wireType != 0 { return fmt.Errorf("proto: wrong wireType = %d for field StressKeySizeLarge", wireType) } @@ -3280,7 +3998,7 @@ func (m *Tester) Unmarshal(dAtA []byte) error { break } } - case 104: + case 203: if wireType != 0 { return fmt.Errorf("proto: wrong wireType = %d for field StressKeySuffixRange", wireType) } @@ -3299,7 +4017,7 @@ func (m *Tester) Unmarshal(dAtA []byte) error { break } } - case 105: + case 204: if wireType != 0 { return fmt.Errorf("proto: wrong wireType = %d for field StressKeySuffixRangeTxn", wireType) } @@ -3318,7 +4036,7 @@ func (m *Tester) Unmarshal(dAtA []byte) error { break } } - case 106: + case 205: if wireType != 0 { return fmt.Errorf("proto: wrong wireType = %d for field StressKeyTxnOps", wireType) } @@ -3337,7 +4055,7 @@ func (m *Tester) Unmarshal(dAtA []byte) error { break } } - case 201: + case 301: if wireType != 0 { return fmt.Errorf("proto: wrong wireType = %d for field StressClients", wireType) } @@ -3356,7 +4074,7 @@ func (m *Tester) Unmarshal(dAtA []byte) error { break } } - case 202: + case 302: if wireType != 0 { return fmt.Errorf("proto: wrong wireType = %d for field StressQPS", wireType) } @@ -4214,162 +4932,181 @@ var ( func init() { proto.RegisterFile("rpcpb/rpc.proto", fileDescriptorRpc) } var fileDescriptorRpc = []byte{ - // 2512 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x59, 0xdb, 0x76, 0xdb, 0xc6, - 0xd5, 0x36, 0x44, 0xcb, 0x8e, 0x86, 0x3a, 0x50, 0xa3, 0x83, 0x61, 0xd9, 0x16, 0x64, 0x38, 0xce, - 0x6f, 0x2b, 0x3f, 0xe4, 0xd4, 0xce, 0x4a, 0x1b, 0xe7, 0xe0, 0x80, 0x14, 0x2c, 0xb1, 0x82, 0x48, - 0x7a, 0x08, 0xd9, 0xc9, 0x15, 0x0b, 0x91, 0x23, 0x09, 0x35, 0x05, 0x30, 0xc0, 0xd0, 0x91, 0xf2, - 0x02, 0xbd, 0xed, 0x79, 0xf5, 0xaa, 0x4f, 0xd0, 0xf4, 0x39, 0x9c, 0xf4, 0x94, 0xb6, 0xf7, 0x5c, - 0xad, 0xf3, 0x06, 0x5c, 0x3d, 0xa5, 0x57, 0x5d, 0x73, 0x80, 0x38, 0x00, 0x48, 0x49, 0x77, 0xc2, - 0xde, 0xdf, 0xf7, 0xcd, 0x9e, 0xd9, 0x33, 0x7b, 0x6f, 0x49, 0x60, 0x26, 0xec, 0x34, 0x3b, 0xbb, - 0xf7, 0xc2, 0x4e, 0x73, 0xad, 0x13, 0x06, 0x24, 0x80, 0xe3, 0xcc, 0xb0, 0x64, 0xec, 0x7b, 0xe4, - 0xa0, 0xbb, 0xbb, 0xd6, 0x0c, 0x0e, 0xef, 0xed, 0x07, 0xfb, 0xc1, 0x3d, 0xe6, 0xdd, 0xed, 0xee, - 0xb1, 0x2f, 0xf6, 0xc1, 0x7e, 0xe2, 0x2c, 0xfd, 0x47, 0x0a, 0xb8, 0x8c, 0xf0, 0xa7, 0x5d, 0x1c, - 0x11, 0xb8, 0x06, 0x26, 0xaa, 0x1d, 0x1c, 0xba, 0xc4, 0x0b, 0x7c, 0x55, 0x59, 0x51, 0xee, 0x4c, - 0xdf, 0x2f, 0xac, 0x31, 0xd5, 0xb5, 0x13, 0x3b, 0x1a, 0x40, 0xe0, 0x6d, 0x70, 0x69, 0x1b, 0x1f, - 0xee, 0xe2, 0x50, 0x1d, 0x5b, 0x51, 0xee, 0xe4, 0xef, 0x4f, 0x09, 0x30, 0x37, 0x22, 0xe1, 0xa4, - 0x30, 0x07, 0x47, 0x04, 0x87, 0x6a, 0x2e, 0x01, 0xe3, 0x46, 0x24, 0x9c, 0x7a, 0x13, 0xbc, 0x86, - 0x70, 0xd4, 0x09, 0xfc, 0x08, 0x43, 0x15, 0x5c, 0xae, 0x77, 0x9b, 0x4d, 0x1c, 0x45, 0x2c, 0x8e, - 0xd7, 0x50, 0xfc, 0x09, 0x17, 0xc1, 0xa5, 0x3a, 0x71, 0x49, 0x37, 0x62, 0x6b, 0x4e, 0x20, 0xf1, - 0x25, 0xc5, 0x92, 0x3b, 0x25, 0x16, 0xfd, 0xd7, 0xf9, 0x18, 0x07, 0x3f, 0x00, 0x93, 0x16, 0x69, - 0xb6, 0xac, 0x23, 0xdc, 0xac, 0xb9, 0xe4, 0x80, 0x2d, 0x34, 0x51, 0xbc, 0xda, 0xef, 0x69, 0x0b, - 0xc7, 0xee, 0x61, 0xfb, 0xa1, 0x8e, 0x49, 0xb3, 0x65, 0xe0, 0x23, 0xdc, 0x34, 0x3a, 0x2e, 0x39, - 0xd0, 0x51, 0x02, 0x0e, 0x1f, 0x80, 0x09, 0x73, 0x1f, 0xfb, 0xc4, 0x6c, 0xb5, 0x42, 0x35, 0xcf, - 0xb8, 0x0b, 0xfd, 0x9e, 0x36, 0xcb, 0xb9, 0x2e, 0x75, 0x19, 0x6e, 0xab, 0x15, 0xea, 0x68, 0x80, - 0x83, 0x36, 0x98, 0x7d, 0xec, 0x7a, 0xed, 0x4e, 0xe0, 0xf9, 0x64, 0xd3, 0x71, 0x6a, 0x8c, 0x3c, - 0xc9, 0xc8, 0xcb, 0xfd, 0x9e, 0xb6, 0xc4, 0xc9, 0x7b, 0x31, 0xc4, 0x38, 0x20, 0xa4, 0x23, 0x54, - 0xb2, 0x44, 0x68, 0x80, 0xcb, 0x45, 0x37, 0xc2, 0xeb, 0x5e, 0xa8, 0x62, 0xa6, 0x31, 0xd7, 0xef, - 0x69, 0x33, 0x5c, 0x63, 0xd7, 0x8d, 0xb0, 0xd1, 0xf2, 0x42, 0x1d, 0xc5, 0x18, 0xf8, 0x10, 0xe4, - 0xe9, 0x0e, 0xec, 0x60, 0x9f, 0xed, 0x77, 0x8f, 0x51, 0xd4, 0x7e, 0x4f, 0x9b, 0x97, 0xf6, 0xdb, - 0x0e, 0xf6, 0xc5, 0x76, 0x65, 0x30, 0xdc, 0x00, 0x33, 0xf4, 0xb3, 0xd4, 0xf6, 0xb0, 0x4f, 0x6a, - 0x61, 0x70, 0x74, 0xac, 0x7e, 0xc9, 0x32, 0x53, 0xbc, 0xde, 0xef, 0x69, 0xaa, 0x24, 0xd0, 0x64, - 0x10, 0xa3, 0x43, 0x31, 0x3a, 0x4a, 0xb3, 0xa0, 0x09, 0xa6, 0xa8, 0xa9, 0x86, 0x71, 0xc8, 0x65, - 0xbe, 0xe2, 0x32, 0x4b, 0xfd, 0x9e, 0xb6, 0x28, 0xc9, 0x74, 0x30, 0x0e, 0x63, 0x91, 0x24, 0x03, - 0xd6, 0x00, 0x1c, 0xa8, 0x5a, 0x7e, 0x8b, 0x1d, 0x8a, 0xfa, 0x05, 0xbb, 0x0f, 0x45, 0xad, 0xdf, - 0xd3, 0xae, 0x65, 0xc3, 0xc1, 0x02, 0xa6, 0xa3, 0x21, 0x5c, 0xf8, 0x1d, 0x70, 0x91, 0x5a, 0xd5, - 0xdf, 0xf2, 0x7b, 0x9c, 0x17, 0x77, 0x87, 0xda, 0x8a, 0x33, 0xfd, 0x9e, 0x96, 0x1f, 0x08, 0xea, - 0x88, 0x41, 0xe1, 0x3a, 0x98, 0xe6, 0x22, 0x25, 0x1c, 0x92, 0x75, 0x97, 0xb8, 0xea, 0x4f, 0x72, - 0x2c, 0x80, 0x6b, 0xfd, 0x9e, 0x76, 0x85, 0xe3, 0xc5, 0xda, 0x4d, 0x1c, 0x12, 0xa3, 0xe5, 0x12, - 0x57, 0x47, 0x29, 0x4e, 0x52, 0x85, 0x65, 0xe5, 0xa7, 0xa7, 0xaa, 0xf0, 0xcc, 0xa4, 0x38, 0xf4, - 0x4c, 0xb9, 0x65, 0x0b, 0x1f, 0xb3, 0x50, 0x7e, 0xc6, 0x45, 0xa4, 0x33, 0x15, 0x22, 0xcf, 0xf1, - 0xb1, 0x88, 0x24, 0xc9, 0x48, 0x48, 0xb0, 0x38, 0x7e, 0x7e, 0x9a, 0x04, 0x0f, 0x23, 0xc9, 0x80, - 0x0e, 0x98, 0xe3, 0x06, 0x27, 0xec, 0x46, 0x04, 0xb7, 0x4a, 0x26, 0x8b, 0xe5, 0x17, 0x5c, 0xe8, - 0x66, 0xbf, 0xa7, 0xdd, 0x48, 0x08, 0x11, 0x0e, 0x33, 0x9a, 0xae, 0x08, 0x69, 0x18, 0x7d, 0x88, - 0x2a, 0x0b, 0xef, 0x97, 0xe7, 0x50, 0xe5, 0x51, 0x0e, 0xa3, 0xc3, 0x0f, 0xc1, 0x24, 0xbd, 0x4f, - 0x27, 0xb9, 0xfb, 0x67, 0x2e, 0xfd, 0xf8, 0xd9, 0xfd, 0x93, 0x32, 0x97, 0xc0, 0xcb, 0x7c, 0x16, - 0xce, 0xbf, 0x4e, 0xe1, 0x8b, 0xe2, 0x21, 0xe3, 0xe1, 0x7b, 0x20, 0x4f, 0xbf, 0xe3, 0x7c, 0xfd, - 0x3b, 0x97, 0x7e, 0x8b, 0x8c, 0x3e, 0xc8, 0x96, 0x8c, 0x96, 0xc8, 0x6c, 0xed, 0xff, 0x8c, 0x26, - 0x8b, 0x87, 0x2c, 0xa1, 0x61, 0x05, 0xcc, 0xd2, 0xcf, 0x64, 0x8e, 0xbe, 0xcd, 0xa5, 0xdf, 0x0e, - 0x93, 0xc8, 0x64, 0x28, 0x4b, 0xcd, 0xe8, 0xb1, 0x90, 0xfe, 0x7b, 0xa6, 0x1e, 0x8f, 0x2c, 0x4b, - 0xd5, 0xbf, 0x99, 0x8c, 0xbb, 0x05, 0x2d, 0x6f, 0x74, 0x09, 0x5a, 0xde, 0x94, 0x74, 0x79, 0xa3, - 0xf1, 0x88, 0xf2, 0x26, 0x30, 0xf0, 0xff, 0xc1, 0xe5, 0x0a, 0x26, 0x9f, 0x05, 0xe1, 0x73, 0xde, - 0x1a, 0x8a, 0xb0, 0xdf, 0xd3, 0xa6, 0x39, 0xdc, 0xe7, 0x0e, 0x1d, 0xc5, 0x10, 0x78, 0x0b, 0x5c, - 0x64, 0xc5, 0x97, 0x47, 0x2a, 0x3d, 0x72, 0x5e, 0x6d, 0x99, 0x13, 0x96, 0xc0, 0xf4, 0x3a, 0x6e, - 0xbb, 0xc7, 0xb6, 0x4b, 0xb0, 0xdf, 0x3c, 0xde, 0x8e, 0x58, 0xa1, 0x9f, 0x92, 0x5f, 0x67, 0x8b, - 0xfa, 0x8d, 0x36, 0x07, 0x18, 0x87, 0x91, 0x8e, 0x52, 0x14, 0xf8, 0x7d, 0x50, 0x48, 0x5a, 0xd0, - 0x0b, 0x56, 0xf2, 0xa7, 0xe4, 0x92, 0x9f, 0x96, 0x31, 0xc2, 0x17, 0x3a, 0xca, 0xf0, 0xe0, 0x27, - 0x60, 0x61, 0xa7, 0xd3, 0x72, 0x09, 0x6e, 0xa5, 0xe2, 0x9a, 0x62, 0x82, 0xb7, 0xfa, 0x3d, 0x4d, - 0xe3, 0x82, 0x5d, 0x0e, 0x33, 0xb2, 0xf1, 0x0d, 0x57, 0x80, 0xef, 0x00, 0x80, 0x82, 0xae, 0xdf, - 0xb2, 0xbd, 0x43, 0x8f, 0xa8, 0x0b, 0x2b, 0xca, 0x9d, 0xf1, 0xe2, 0x62, 0xbf, 0xa7, 0x41, 0xae, - 0x17, 0x52, 0x9f, 0xd1, 0xa6, 0x4e, 0x1d, 0x49, 0x48, 0xf8, 0x11, 0x98, 0xb2, 0x8e, 0x3c, 0x52, - 0xf5, 0x69, 0x7f, 0xea, 0x86, 0x58, 0x5d, 0xcc, 0xd4, 0xf3, 0x23, 0x8f, 0x18, 0x81, 0x6f, 0xec, - 0x71, 0x00, 0xad, 0xe7, 0x32, 0x01, 0x6e, 0x82, 0x42, 0x29, 0xf0, 0x23, 0x2f, 0x62, 0xa1, 0x94, - 0x0e, 0x70, 0xf3, 0xb9, 0x7a, 0x25, 0xdd, 0x5b, 0x9a, 0x03, 0x84, 0xd1, 0xa4, 0x10, 0x1d, 0x65, - 0x58, 0xf0, 0x5d, 0x90, 0xb7, 0x7c, 0x77, 0xb7, 0x8d, 0x6b, 0x9d, 0x30, 0xd8, 0x53, 0x55, 0x26, - 0x72, 0xa5, 0xdf, 0xd3, 0xe6, 0x44, 0x24, 0xcc, 0x69, 0x74, 0xa8, 0x97, 0x36, 0xb8, 0x01, 0x96, - 0xa6, 0x5a, 0xc4, 0xc3, 0xce, 0x65, 0x3b, 0x52, 0xb5, 0x74, 0xaa, 0x45, 0xfc, 0xe2, 0x48, 0x59, - 0xaa, 0x93, 0x14, 0x58, 0x3c, 0x11, 0xa9, 0x1f, 0x74, 0xf7, 0xf6, 0xda, 0x58, 0x5d, 0x49, 0x1f, - 0x46, 0x2c, 0x12, 0x71, 0xc0, 0x40, 0x43, 0x30, 0xe0, 0xfb, 0x60, 0x52, 0x58, 0x4a, 0x6e, 0x84, - 0x23, 0xf5, 0xe6, 0x4a, 0x2e, 0xf9, 0xba, 0x63, 0x85, 0x26, 0x75, 0xeb, 0x28, 0x81, 0x86, 0x5b, - 0xd2, 0x80, 0x51, 0x0a, 0x0e, 0x0f, 0x5d, 0xbf, 0x15, 0xa9, 0x3a, 0x93, 0xb8, 0xd1, 0xef, 0x69, - 0x57, 0xd3, 0x03, 0x46, 0x53, 0x60, 0xe4, 0xf9, 0x22, 0xe6, 0xd1, 0x33, 0x41, 0x5d, 0xdf, 0xc7, - 0xe1, 0xc9, 0x8c, 0x74, 0x37, 0xdd, 0x9c, 0x42, 0xe6, 0x97, 0xa7, 0xa4, 0x14, 0x05, 0x96, 0x41, - 0xc1, 0x3a, 0x22, 0x38, 0xf4, 0xdd, 0xf6, 0x89, 0xcc, 0x2a, 0x93, 0x91, 0x02, 0xc2, 0x02, 0x21, - 0x0b, 0x65, 0x68, 0x34, 0xbd, 0x75, 0x12, 0xe2, 0x28, 0x72, 0x8e, 0x3b, 0x38, 0x52, 0x31, 0xdb, - 0x96, 0x94, 0xde, 0x88, 0x39, 0x0d, 0x42, 0xbd, 0x3a, 0x92, 0xb1, 0xf4, 0x96, 0xf2, 0xcf, 0x2d, - 0x7c, 0x5c, 0xf7, 0x3e, 0xc7, 0x6c, 0xfa, 0x19, 0x97, 0x13, 0x23, 0xc8, 0xb4, 0x6c, 0x46, 0xde, - 0xe7, 0xf4, 0x96, 0x26, 0x08, 0x74, 0xea, 0x48, 0x18, 0x6c, 0x37, 0xdc, 0xc7, 0xea, 0x3e, 0x93, - 0x59, 0xe9, 0xf7, 0xb4, 0xeb, 0x43, 0x65, 0x8c, 0x36, 0x85, 0xe9, 0x68, 0x08, 0x17, 0x3e, 0x05, - 0xf3, 0x03, 0x6b, 0x77, 0x6f, 0xcf, 0x3b, 0x42, 0xae, 0xbf, 0x8f, 0xd5, 0x03, 0xa6, 0xa9, 0xf7, - 0x7b, 0xda, 0x72, 0x56, 0x93, 0xe1, 0x8c, 0x90, 0x02, 0x75, 0x34, 0x94, 0x0f, 0x7f, 0x00, 0xae, - 0x0c, 0xb3, 0x3b, 0x47, 0xbe, 0xea, 0x31, 0xe9, 0x37, 0xfa, 0x3d, 0x4d, 0x3f, 0x55, 0xda, 0x20, - 0x47, 0xbe, 0x8e, 0x46, 0xc9, 0xd0, 0x69, 0xf0, 0xc4, 0xe5, 0x1c, 0xf9, 0xd5, 0x4e, 0xa4, 0xfe, - 0x90, 0x29, 0x4b, 0x29, 0x95, 0x94, 0xc9, 0x91, 0x6f, 0x04, 0x9d, 0x48, 0x47, 0x69, 0xd6, 0x20, - 0x2d, 0xbc, 0x49, 0x47, 0x7c, 0xa8, 0x1c, 0x97, 0x1b, 0xa9, 0xd0, 0xe1, 0xed, 0x3d, 0x3a, 0x49, - 0x8b, 0x20, 0xc0, 0xb7, 0xc1, 0x04, 0x37, 0x3c, 0xa9, 0xd5, 0xf9, 0x2c, 0x39, 0x2e, 0xcf, 0xe1, - 0x82, 0xfd, 0x29, 0x5d, 0x7d, 0x00, 0xd4, 0xbf, 0x9d, 0xe2, 0x13, 0x1f, 0x6d, 0x03, 0x15, 0xf7, - 0x10, 0x8b, 0x06, 0x23, 0xb5, 0x01, 0xdf, 0x3d, 0xc4, 0x3a, 0x62, 0x4e, 0xb9, 0x11, 0x8d, 0x9d, - 0xa3, 0x11, 0xad, 0x82, 0x4b, 0xcf, 0x4c, 0x9b, 0xa2, 0x73, 0xe9, 0x3e, 0xf4, 0x99, 0xdb, 0xe6, - 0x60, 0x81, 0x80, 0x55, 0x30, 0xb7, 0x89, 0xdd, 0x90, 0xec, 0x62, 0x97, 0x94, 0x7d, 0x82, 0xc3, - 0x17, 0x6e, 0x5b, 0xb4, 0x99, 0x9c, 0x7c, 0x9a, 0x07, 0x31, 0xc8, 0xf0, 0x04, 0x4a, 0x47, 0xc3, - 0x98, 0xb0, 0x0c, 0x66, 0xad, 0x36, 0x6e, 0xd2, 0xdf, 0xcf, 0x1c, 0xef, 0x10, 0x07, 0x5d, 0xb2, - 0x1d, 0xb1, 0x76, 0x93, 0x93, 0x9f, 0x2d, 0x16, 0x10, 0x83, 0x70, 0x8c, 0x8e, 0xb2, 0x2c, 0xfa, - 0x72, 0x6d, 0x56, 0x5e, 0xf9, 0x59, 0xef, 0x20, 0x3b, 0x52, 0x17, 0xd2, 0xa5, 0xa4, 0xcd, 0x10, - 0xf1, 0x98, 0xdd, 0x0d, 0xdb, 0x91, 0x8e, 0x32, 0x34, 0x88, 0xc0, 0x9c, 0xd9, 0x7a, 0x81, 0x43, - 0xe2, 0x45, 0x58, 0x52, 0x5b, 0x64, 0x6a, 0xd2, 0xeb, 0x71, 0x63, 0x50, 0x52, 0x70, 0x18, 0x19, - 0xbe, 0x1b, 0x8f, 0xac, 0x66, 0x97, 0x04, 0x8e, 0x5d, 0x17, 0x3d, 0x43, 0xca, 0x8d, 0xdb, 0x25, - 0x81, 0x41, 0xa8, 0x40, 0x12, 0x49, 0x0b, 0xdb, 0x60, 0x84, 0x36, 0xbb, 0xe4, 0x40, 0xb4, 0x8a, - 0x11, 0x53, 0xb7, 0xdb, 0x4d, 0x4d, 0xdd, 0x94, 0x02, 0xdf, 0x97, 0x45, 0x1e, 0x7b, 0x6d, 0xac, - 0x5e, 0x65, 0xe9, 0x9e, 0xef, 0xf7, 0xb4, 0x82, 0x10, 0xa1, 0xec, 0x3d, 0x8f, 0x95, 0xf9, 0x24, - 0x76, 0x10, 0xfd, 0x16, 0x3e, 0x66, 0xe4, 0xa5, 0xf4, 0xcd, 0xa2, 0x2f, 0x87, 0x73, 0x93, 0x48, - 0x68, 0x67, 0x46, 0x62, 0x26, 0x70, 0x2d, 0x3d, 0xb0, 0x4b, 0xe3, 0x16, 0xd7, 0x19, 0x46, 0xa3, - 0x67, 0xc1, 0xd3, 0x45, 0x67, 0x31, 0x96, 0x15, 0x8d, 0x65, 0x45, 0x3a, 0x0b, 0x91, 0x63, 0x36, - 0xc3, 0xf1, 0x84, 0xa4, 0x28, 0xd0, 0x01, 0xb3, 0x27, 0x29, 0x3a, 0xd1, 0x59, 0x61, 0x3a, 0x52, - 0xb1, 0xf1, 0x7c, 0x8f, 0x78, 0x6e, 0xdb, 0x18, 0x64, 0x59, 0x92, 0xcc, 0x0a, 0xd0, 0x5f, 0x58, - 0xe9, 0xcf, 0x71, 0x7e, 0x6f, 0xb2, 0x1c, 0xa5, 0xe7, 0xdc, 0x41, 0x92, 0x65, 0x30, 0x2d, 0xd7, - 0x6c, 0xe2, 0x4e, 0xa6, 0x59, 0x67, 0x12, 0xd2, 0x85, 0xe3, 0x63, 0x7a, 0x26, 0xd7, 0x43, 0xb8, - 0xf0, 0x83, 0xc1, 0xcc, 0xcf, 0xce, 0xfb, 0xd6, 0xe8, 0x91, 0x9f, 0x1f, 0x77, 0x02, 0x1e, 0x6f, - 0x26, 0x4e, 0xf7, 0xeb, 0x23, 0x87, 0x76, 0x4e, 0x96, 0xc1, 0x70, 0x3b, 0x35, 0x64, 0x33, 0x85, - 0xdb, 0x67, 0xcd, 0xd8, 0x5c, 0x28, 0xcb, 0xa4, 0x63, 0x4a, 0x99, 0xa7, 0xa2, 0xd4, 0xee, 0xb2, - 0x3f, 0xcc, 0xdc, 0x4d, 0xdf, 0x9d, 0x38, 0x55, 0x4d, 0x0e, 0xd0, 0x51, 0x8a, 0x41, 0x5f, 0x74, - 0xd2, 0x52, 0x27, 0x2e, 0xc1, 0xa2, 0xb3, 0x4b, 0x07, 0x9c, 0x12, 0x32, 0x22, 0x0a, 0xd3, 0xd1, - 0x30, 0x72, 0x56, 0xd3, 0x09, 0x9e, 0x63, 0x5f, 0x7d, 0xf3, 0x2c, 0x4d, 0x42, 0x61, 0x19, 0x4d, - 0x46, 0x86, 0x8f, 0xc0, 0x54, 0xdd, 0x77, 0x3b, 0xd1, 0x41, 0x40, 0x4a, 0x41, 0xd7, 0x27, 0xea, - 0x03, 0x56, 0x0b, 0xe5, 0x06, 0x23, 0xdc, 0x46, 0x93, 0xfa, 0x69, 0x83, 0x91, 0xf1, 0xd0, 0x06, - 0xb3, 0x4f, 0xba, 0x01, 0x71, 0x8b, 0x6e, 0xf3, 0x39, 0xf6, 0x5b, 0xc5, 0x63, 0x82, 0x23, 0xf5, - 0x6d, 0x26, 0x22, 0xcd, 0xef, 0x9f, 0x52, 0x88, 0xb1, 0xcb, 0x31, 0xc6, 0x2e, 0x05, 0xe9, 0x28, - 0x4b, 0xa4, 0xad, 0xa4, 0x16, 0xe2, 0xa7, 0x01, 0xc1, 0xea, 0xa3, 0x74, 0xb9, 0xea, 0x84, 0xd8, - 0x78, 0x11, 0xd0, 0xd3, 0x89, 0x31, 0xf2, 0x89, 0x04, 0x61, 0xd8, 0xed, 0x10, 0x3e, 0x1d, 0x7f, - 0x94, 0xbe, 0xc6, 0x27, 0x27, 0xc2, 0x51, 0xf1, 0x84, 0x3c, 0x8c, 0xbc, 0xfa, 0x72, 0x4c, 0xfa, - 0x33, 0x1f, 0x9c, 0x01, 0xf9, 0x4a, 0xd5, 0x69, 0xd4, 0x1d, 0x13, 0x39, 0xd6, 0x7a, 0xe1, 0x02, - 0x5c, 0x04, 0xb0, 0x5c, 0x29, 0x3b, 0x65, 0xd3, 0xe6, 0xc6, 0x86, 0xe5, 0x94, 0xd6, 0x0b, 0x00, - 0x16, 0xc0, 0x24, 0xb2, 0x24, 0x4b, 0x9e, 0x5a, 0xea, 0xe5, 0x0d, 0xc7, 0x42, 0xdb, 0xdc, 0x32, - 0x0f, 0x57, 0xc0, 0xf5, 0x7a, 0x79, 0xe3, 0xc9, 0x4e, 0x99, 0x63, 0x1a, 0x66, 0x65, 0xbd, 0x81, - 0xac, 0xed, 0xea, 0x53, 0xab, 0xb1, 0x6e, 0x3a, 0x66, 0x61, 0x01, 0xde, 0x04, 0x37, 0x32, 0x08, - 0x13, 0x95, 0x36, 0xcb, 0x31, 0x64, 0x19, 0xde, 0x03, 0x6f, 0x9e, 0x26, 0xc2, 0xbe, 0xeb, 0x4e, - 0xb5, 0xd6, 0x30, 0x37, 0xac, 0x8a, 0x53, 0xd0, 0xe0, 0x0d, 0x70, 0xb5, 0x68, 0x9b, 0xa5, 0xad, - 0xcd, 0xaa, 0x6d, 0x35, 0x6a, 0x96, 0x85, 0x1a, 0xb5, 0x2a, 0x72, 0x1a, 0xce, 0xc7, 0x0d, 0xf4, - 0x71, 0xa1, 0x05, 0x35, 0x70, 0x6d, 0xa7, 0x32, 0x1a, 0x80, 0xe1, 0x12, 0x58, 0x58, 0xb7, 0x6c, - 0xf3, 0x93, 0x8c, 0xeb, 0xa5, 0x02, 0xaf, 0x83, 0x2b, 0x3b, 0x95, 0xe1, 0xde, 0x2f, 0x95, 0xd5, - 0x2f, 0x00, 0xc8, 0x4b, 0xe3, 0x37, 0x54, 0xc1, 0x7c, 0x7c, 0x22, 0xd5, 0x8a, 0xd5, 0x78, 0x5c, - 0xb5, 0xed, 0xea, 0x33, 0x0b, 0x15, 0x2e, 0x88, 0x4d, 0x65, 0x3c, 0x8d, 0x9d, 0x8a, 0x53, 0xb6, - 0x1b, 0x0e, 0x2a, 0x6f, 0x6c, 0x58, 0xa8, 0x51, 0xaf, 0x98, 0xb5, 0xfa, 0x66, 0xd5, 0x29, 0x28, - 0x10, 0x82, 0xe9, 0x98, 0x60, 0x5b, 0xe6, 0xba, 0x85, 0x0a, 0x63, 0xf0, 0x2e, 0xb8, 0x9d, 0xb4, - 0x8d, 0xa2, 0xe7, 0x64, 0xfa, 0x93, 0x9d, 0x2a, 0xda, 0xd9, 0x2e, 0x5c, 0xa4, 0xa9, 0x8e, 0x6d, - 0xa6, 0x6d, 0x17, 0xc6, 0xe1, 0x2d, 0xa0, 0xc5, 0x27, 0x2d, 0x1d, 0x72, 0x22, 0x72, 0x00, 0x1f, - 0x82, 0x77, 0xce, 0x00, 0x8d, 0x8a, 0x22, 0x4f, 0x33, 0x33, 0x84, 0x2b, 0xf6, 0x33, 0x09, 0xdf, - 0x06, 0x6f, 0x8d, 0x74, 0x8f, 0x12, 0x9d, 0x82, 0xab, 0xe0, 0x8d, 0x91, 0xd9, 0x4c, 0x06, 0xdf, - 0x82, 0x26, 0xf8, 0xe0, 0x7c, 0xd8, 0x51, 0xcb, 0x61, 0xf8, 0x3a, 0x58, 0x19, 0x2d, 0x21, 0xb6, - 0xb2, 0x07, 0xdf, 0x03, 0xdf, 0x3d, 0x0b, 0x35, 0x6a, 0x89, 0xfd, 0xd3, 0x97, 0x10, 0xe9, 0x3b, - 0xa0, 0x4f, 0x67, 0x34, 0x8a, 0x26, 0xd4, 0x83, 0xff, 0x07, 0xf4, 0xa1, 0x77, 0x35, 0x79, 0x2c, - 0x2f, 0x15, 0xb8, 0x06, 0xee, 0x22, 0xb3, 0xb2, 0x5e, 0xdd, 0x6e, 0x9c, 0x03, 0xff, 0xa5, 0x02, - 0x3f, 0x04, 0xef, 0x9e, 0x0d, 0x1c, 0xb5, 0xc1, 0xaf, 0x14, 0x68, 0x81, 0x8f, 0xce, 0xbd, 0xde, - 0x28, 0x99, 0xdf, 0x29, 0xf0, 0x26, 0xb8, 0x3e, 0x9c, 0x2f, 0xf2, 0xf0, 0x7b, 0x05, 0xde, 0x01, - 0xb7, 0x4e, 0x5d, 0x49, 0x20, 0xff, 0xa0, 0xc0, 0xef, 0x81, 0x07, 0xa7, 0x41, 0x46, 0x85, 0xf1, - 0x47, 0x05, 0x3e, 0x02, 0x0f, 0xcf, 0xb1, 0xc6, 0x28, 0x81, 0x3f, 0x9d, 0xb2, 0x0f, 0x91, 0xec, - 0xaf, 0xcf, 0xde, 0x87, 0x40, 0xfe, 0x59, 0x81, 0xcb, 0xe0, 0xea, 0x70, 0x08, 0xbd, 0x13, 0x7f, - 0x51, 0xe0, 0x6d, 0xb0, 0x72, 0xaa, 0x12, 0x85, 0xfd, 0x55, 0x81, 0x2a, 0x98, 0xab, 0x54, 0x1b, - 0x8f, 0xcd, 0xb2, 0xdd, 0x78, 0x56, 0x76, 0x36, 0x1b, 0x75, 0x07, 0x59, 0xf5, 0x7a, 0xe1, 0x37, - 0x63, 0x34, 0x94, 0x84, 0xa7, 0x52, 0x15, 0xce, 0xc6, 0xe3, 0x2a, 0x6a, 0xd8, 0xe5, 0xa7, 0x56, - 0x85, 0x22, 0xbf, 0x18, 0x83, 0x33, 0x00, 0x50, 0x58, 0xad, 0x5a, 0xae, 0x38, 0xf5, 0xc2, 0x8f, - 0x73, 0x70, 0x0a, 0xbc, 0x66, 0x7d, 0xec, 0x58, 0xa8, 0x62, 0xda, 0x85, 0x7f, 0xe4, 0x56, 0x03, - 0x00, 0x06, 0xbf, 0x94, 0xc3, 0x4b, 0x60, 0x6c, 0xeb, 0x69, 0xe1, 0x02, 0x9c, 0x00, 0xe3, 0xb6, - 0x65, 0xd6, 0xad, 0x82, 0x02, 0xe7, 0xc0, 0x8c, 0x65, 0x5b, 0x25, 0xa7, 0x5c, 0xad, 0x34, 0xd0, - 0x4e, 0xa5, 0xc2, 0xca, 0x5e, 0x01, 0x4c, 0x3e, 0x33, 0x9d, 0xd2, 0x66, 0x6c, 0xc9, 0xc1, 0x05, - 0x30, 0x6b, 0x57, 0x4b, 0x5b, 0x0d, 0x64, 0x96, 0x2c, 0x14, 0x9b, 0x2f, 0x52, 0x20, 0x13, 0x8a, - 0x2d, 0xe3, 0xf7, 0x1f, 0x81, 0x09, 0x27, 0x74, 0xfd, 0xa8, 0x13, 0x84, 0x04, 0xde, 0x97, 0x3f, - 0xa6, 0xc5, 0xdf, 0xf8, 0xc5, 0xff, 0xbe, 0x96, 0x66, 0x4e, 0xbe, 0xf9, 0xbf, 0xa0, 0xf4, 0x0b, - 0x77, 0x94, 0xb7, 0x94, 0xe2, 0xfc, 0xcb, 0xbf, 0x2f, 0x5f, 0x78, 0xf9, 0x6a, 0x59, 0xf9, 0xfa, - 0xd5, 0xb2, 0xf2, 0xb7, 0x57, 0xcb, 0xca, 0xaf, 0xbe, 0x59, 0xbe, 0xb0, 0x7b, 0x89, 0xfd, 0xef, - 0xec, 0xc1, 0xff, 0x02, 0x00, 0x00, 0xff, 0xff, 0x86, 0xcd, 0x31, 0x78, 0x84, 0x1b, 0x00, 0x00, + // 2808 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x59, 0xdb, 0x73, 0xdb, 0xc6, + 0xf5, 0x16, 0x44, 0x5d, 0x57, 0x37, 0x68, 0x65, 0xd9, 0xf0, 0x4d, 0x90, 0xe1, 0x38, 0x3f, 0x59, + 0x09, 0xec, 0xfc, 0xec, 0x4c, 0x2e, 0x4e, 0x13, 0x07, 0xa4, 0x20, 0x8b, 0x15, 0x44, 0xd2, 0x4b, + 0xc8, 0x76, 0x9e, 0x38, 0x10, 0xb9, 0x92, 0x30, 0xa6, 0x00, 0x06, 0x58, 0x2a, 0x52, 0xfe, 0x81, + 0xbe, 0xf6, 0x3e, 0xed, 0x4c, 0x9f, 0xfa, 0xdc, 0xb4, 0xff, 0x86, 0x73, 0x6b, 0xd3, 0xf6, 0xa9, + 0xed, 0x0c, 0xa7, 0x4d, 0x5f, 0xfa, 0xd4, 0x07, 0x4e, 0x6f, 0xe9, 0x53, 0x67, 0x77, 0x01, 0x71, + 0x01, 0x90, 0x92, 0x9e, 0xa4, 0x3d, 0xe7, 0xfb, 0xbe, 0x3d, 0xbb, 0x67, 0xb1, 0xe7, 0x00, 0x04, + 0x73, 0x41, 0xab, 0xde, 0xda, 0xb9, 0x1b, 0xb4, 0xea, 0x77, 0x5a, 0x81, 0x4f, 0x7c, 0x38, 0xca, + 0x0c, 0x57, 0xf4, 0x3d, 0x97, 0xec, 0xb7, 0x77, 0xee, 0xd4, 0xfd, 0x83, 0xbb, 0x7b, 0xfe, 0x9e, + 0x7f, 0x97, 0x79, 0x77, 0xda, 0xbb, 0x6c, 0xc4, 0x06, 0xec, 0x3f, 0xce, 0xd2, 0xbe, 0x23, 0x81, + 0x71, 0x84, 0x3f, 0x6c, 0xe3, 0x90, 0xc0, 0x3b, 0x60, 0xb2, 0xdc, 0xc2, 0x81, 0x43, 0x5c, 0xdf, + 0x53, 0xa4, 0x65, 0x69, 0x65, 0xf6, 0x9e, 0x7c, 0x87, 0xa9, 0xde, 0x39, 0xb1, 0xa3, 0x1e, 0x04, + 0xde, 0x02, 0x63, 0x5b, 0xf8, 0x60, 0x07, 0x07, 0xca, 0xf0, 0xb2, 0xb4, 0x32, 0x75, 0x6f, 0x26, + 0x02, 0x73, 0x23, 0x8a, 0x9c, 0x14, 0x66, 0xe3, 0x90, 0xe0, 0x40, 0xc9, 0x25, 0x60, 0xdc, 0x88, + 0x22, 0xa7, 0xf6, 0xb7, 0x61, 0x30, 0x5d, 0xf5, 0x9c, 0x56, 0xb8, 0xef, 0x93, 0xa2, 0xb7, 0xeb, + 0xc3, 0x25, 0x00, 0xb8, 0x42, 0xc9, 0x39, 0xc0, 0x2c, 0x9e, 0x49, 0x24, 0x58, 0xe0, 0x2a, 0x90, + 0xf9, 0xa8, 0xd0, 0x74, 0xb1, 0x47, 0xb6, 0x91, 0x15, 0x2a, 0xc3, 0xcb, 0xb9, 0x95, 0x49, 0x94, + 0xb1, 0x43, 0xad, 0xa7, 0x5d, 0x71, 0xc8, 0x3e, 0x8b, 0x64, 0x12, 0x25, 0x6c, 0x54, 0x2f, 0x1e, + 0xaf, 0xbb, 0x4d, 0x5c, 0x75, 0x3f, 0xc6, 0xca, 0x08, 0xc3, 0x65, 0xec, 0xf0, 0x55, 0x30, 0x1f, + 0xdb, 0x6c, 0x9f, 0x38, 0x4d, 0x06, 0x1e, 0x65, 0xe0, 0xac, 0x43, 0x54, 0x66, 0xc6, 0x4d, 0x7c, + 0xac, 0x8c, 0x2d, 0x4b, 0x2b, 0x39, 0x94, 0xb1, 0x8b, 0x91, 0x6e, 0x38, 0xe1, 0xbe, 0x32, 0xce, + 0x70, 0x09, 0x9b, 0xa8, 0x87, 0xf0, 0xa1, 0x1b, 0xd2, 0x7c, 0x4d, 0x24, 0xf5, 0x62, 0x3b, 0x84, + 0x60, 0xc4, 0xf6, 0xfd, 0xe7, 0xca, 0x24, 0x0b, 0x8e, 0xfd, 0xaf, 0xfd, 0x4c, 0x02, 0x13, 0x08, + 0x87, 0x2d, 0xdf, 0x0b, 0x31, 0x54, 0xc0, 0x78, 0xb5, 0x5d, 0xaf, 0xe3, 0x30, 0x64, 0x7b, 0x3c, + 0x81, 0xe2, 0x21, 0xbc, 0x08, 0xc6, 0xaa, 0xc4, 0x21, 0xed, 0x90, 0xe5, 0x77, 0x12, 0x45, 0x23, + 0x21, 0xef, 0xb9, 0xd3, 0xf2, 0xfe, 0x66, 0x32, 0x9f, 0x6c, 0x2f, 0xa7, 0xee, 0x2d, 0x44, 0x60, + 0xd1, 0x85, 0x12, 0x40, 0xed, 0x4f, 0xd3, 0xf1, 0x04, 0xf0, 0x5d, 0x30, 0x6d, 0x92, 0x7a, 0xc3, + 0x3c, 0xc2, 0x75, 0x96, 0x37, 0x76, 0x0a, 0xf2, 0x97, 0xbb, 0x1d, 0x75, 0xf1, 0xd8, 0x39, 0x68, + 0x3e, 0xd0, 0x30, 0xa9, 0x37, 0x74, 0x7c, 0x84, 0xeb, 0x7a, 0xcb, 0x21, 0xfb, 0x1a, 0x4a, 0xc0, + 0xe1, 0x7d, 0x30, 0x69, 0xec, 0x61, 0x8f, 0x18, 0x8d, 0x46, 0xa0, 0x4c, 0x31, 0xee, 0x62, 0xb7, + 0xa3, 0xce, 0x73, 0xae, 0x43, 0x5d, 0xba, 0xd3, 0x68, 0x04, 0x1a, 0xea, 0xe1, 0xa0, 0x05, 0xe6, + 0xd7, 0x1d, 0xb7, 0xd9, 0xf2, 0x5d, 0x8f, 0x6c, 0xd8, 0x76, 0x85, 0x91, 0xa7, 0x19, 0x79, 0xa9, + 0xdb, 0x51, 0xaf, 0x70, 0xf2, 0x6e, 0x0c, 0xd1, 0xf7, 0x09, 0x69, 0x45, 0x2a, 0x59, 0x22, 0xd4, + 0xc1, 0x78, 0xde, 0x09, 0xf1, 0x9a, 0x1b, 0x28, 0x98, 0x69, 0x2c, 0x74, 0x3b, 0xea, 0x1c, 0xd7, + 0xd8, 0x71, 0x42, 0xac, 0x37, 0xdc, 0x40, 0x43, 0x31, 0x06, 0x3e, 0x00, 0x53, 0x74, 0x05, 0x96, + 0xbf, 0xc7, 0xd6, 0xbb, 0xcb, 0x28, 0x4a, 0xb7, 0xa3, 0x5e, 0x10, 0xd6, 0xdb, 0xf4, 0xf7, 0xa2, + 0xe5, 0x8a, 0x60, 0xf8, 0x08, 0xcc, 0xd1, 0x21, 0x3f, 0xf6, 0x95, 0xc0, 0x3f, 0x3a, 0x56, 0x3e, + 0x65, 0x29, 0xcd, 0x5f, 0xeb, 0x76, 0x54, 0x45, 0x10, 0xa8, 0x33, 0x88, 0xde, 0xa2, 0x18, 0x0d, + 0xa5, 0x59, 0xd0, 0x00, 0x33, 0xd4, 0x54, 0xc1, 0x38, 0xe0, 0x32, 0x9f, 0x71, 0x99, 0x2b, 0xdd, + 0x8e, 0x7a, 0x51, 0x90, 0x69, 0x61, 0x1c, 0xc4, 0x22, 0x49, 0x06, 0xac, 0x00, 0xd8, 0x53, 0x35, + 0xbd, 0x06, 0xdb, 0x14, 0xe5, 0x13, 0x76, 0x90, 0xf2, 0x6a, 0xb7, 0xa3, 0x5e, 0xcd, 0x86, 0x83, + 0x23, 0x98, 0x86, 0xfa, 0x70, 0xe1, 0xff, 0x83, 0x11, 0x6a, 0x55, 0x7e, 0xc9, 0x2f, 0x9b, 0xa9, + 0xe8, 0x1c, 0x51, 0x5b, 0x7e, 0xae, 0xdb, 0x51, 0xa7, 0x7a, 0x82, 0x1a, 0x62, 0x50, 0x98, 0x07, + 0x8b, 0xf4, 0x6f, 0xd9, 0xeb, 0x3d, 0x15, 0x21, 0xf1, 0x03, 0xac, 0xfc, 0x2a, 0xab, 0x81, 0xfa, + 0x43, 0xe1, 0x1a, 0x98, 0xe5, 0x81, 0x14, 0x70, 0x40, 0xd6, 0x1c, 0xe2, 0x28, 0xdf, 0x63, 0x97, + 0x47, 0xfe, 0x6a, 0xb7, 0xa3, 0x5e, 0xe2, 0x73, 0x46, 0xf1, 0xd7, 0x71, 0x40, 0xf4, 0x86, 0x43, + 0x1c, 0x0d, 0xa5, 0x38, 0x49, 0x15, 0x96, 0xd9, 0xef, 0x9f, 0xaa, 0xc2, 0xb3, 0x9b, 0xe2, 0xd0, + 0xbc, 0x70, 0xcb, 0x26, 0x3e, 0x66, 0xa1, 0xfc, 0x80, 0x8b, 0x08, 0x79, 0x89, 0x44, 0x9e, 0xe3, + 0xe3, 0x28, 0x92, 0x24, 0x23, 0x21, 0xc1, 0xe2, 0xf8, 0xe1, 0x69, 0x12, 0x3c, 0x8c, 0x24, 0x03, + 0xda, 0x60, 0x81, 0x1b, 0xec, 0xa0, 0x1d, 0x12, 0xdc, 0x28, 0x18, 0x2c, 0x96, 0x1f, 0x71, 0xa1, + 0x1b, 0xdd, 0x8e, 0x7a, 0x3d, 0x21, 0x44, 0x38, 0x4c, 0xaf, 0x3b, 0x51, 0x48, 0xfd, 0xe8, 0x7d, + 0x54, 0x59, 0x78, 0x3f, 0x3e, 0x87, 0x2a, 0x8f, 0xb2, 0x1f, 0x1d, 0xbe, 0x07, 0xa6, 0xe9, 0x99, + 0x3c, 0xc9, 0xdd, 0x3f, 0x73, 0xe9, 0x0b, 0x84, 0x9d, 0x61, 0x21, 0x73, 0x09, 0xbc, 0xc8, 0x67, + 0xe1, 0xfc, 0xeb, 0x14, 0x7e, 0x74, 0x01, 0x89, 0x78, 0xf8, 0x0e, 0x98, 0xa2, 0xe3, 0x38, 0x5f, + 0xff, 0xce, 0xa5, 0x9f, 0x67, 0x46, 0xef, 0x65, 0x4b, 0x44, 0x0b, 0x64, 0x36, 0xf7, 0x7f, 0x06, + 0x93, 0xa3, 0xcb, 0x40, 0x40, 0xc3, 0x12, 0x98, 0xa7, 0xc3, 0x64, 0x8e, 0xbe, 0xc9, 0xa5, 0x9f, + 0x3f, 0x26, 0x91, 0xc9, 0x50, 0x96, 0x9a, 0xd1, 0x63, 0x21, 0xfd, 0xf7, 0x4c, 0x3d, 0x1e, 0x59, + 0x96, 0x4a, 0x6f, 0xf6, 0x44, 0x45, 0xfe, 0xc3, 0x48, 0x7a, 0x75, 0x61, 0xe4, 0x8e, 0x37, 0x36, + 0x51, 0xac, 0xdf, 0x4a, 0x15, 0x97, 0x3f, 0x9e, 0xbb, 0xba, 0xfc, 0x7c, 0x3a, 0xee, 0x47, 0xe8, + 0xdd, 0x4c, 0xd7, 0x46, 0xef, 0x66, 0x29, 0x7d, 0x37, 0xd3, 0x8d, 0x88, 0xee, 0xe6, 0x08, 0x03, + 0x5f, 0x05, 0xe3, 0x25, 0x4c, 0x3e, 0xf2, 0x83, 0xe7, 0xbc, 0x20, 0xe6, 0x61, 0xb7, 0xa3, 0xce, + 0x72, 0xb8, 0xc7, 0x1d, 0x1a, 0x8a, 0x21, 0xf0, 0x26, 0x18, 0x61, 0x95, 0x83, 0x6f, 0x91, 0x70, + 0x43, 0xf1, 0x52, 0xc1, 0x9c, 0xb0, 0x00, 0x66, 0xd7, 0x70, 0xd3, 0x39, 0xb6, 0x1c, 0x82, 0xbd, + 0xfa, 0xf1, 0x56, 0xc8, 0xaa, 0xd4, 0x8c, 0x78, 0x2d, 0x34, 0xa8, 0x5f, 0x6f, 0x72, 0x80, 0x7e, + 0x10, 0x6a, 0x28, 0x45, 0x81, 0xdf, 0x06, 0x72, 0xd2, 0x82, 0x0e, 0x59, 0xbd, 0x9a, 0x11, 0xeb, + 0x55, 0x5a, 0x46, 0x0f, 0x0e, 0x35, 0x94, 0xe1, 0xc1, 0x0f, 0xc0, 0xe2, 0x76, 0xab, 0xe1, 0x10, + 0xdc, 0x48, 0xc5, 0x35, 0xc3, 0x04, 0x6f, 0x76, 0x3b, 0xaa, 0xca, 0x05, 0xdb, 0x1c, 0xa6, 0x67, + 0xe3, 0xeb, 0xaf, 0x00, 0xdf, 0x00, 0x00, 0xf9, 0x6d, 0xaf, 0x61, 0xb9, 0x07, 0x2e, 0x51, 0x16, + 0x97, 0xa5, 0x95, 0xd1, 0xfc, 0xc5, 0x6e, 0x47, 0x85, 0x5c, 0x2f, 0xa0, 0x3e, 0xbd, 0x49, 0x9d, + 0x1a, 0x12, 0x90, 0x30, 0x0f, 0x66, 0xcd, 0x23, 0x97, 0x94, 0xbd, 0x82, 0x13, 0x62, 0x5a, 0x60, + 0x95, 0x8b, 0x99, 0x6a, 0x74, 0xe4, 0x12, 0xdd, 0xf7, 0x74, 0x5a, 0x94, 0xdb, 0x01, 0xd6, 0x50, + 0x8a, 0x01, 0xdf, 0x06, 0x53, 0xa6, 0xe7, 0xec, 0x34, 0x71, 0xa5, 0x15, 0xf8, 0xbb, 0xca, 0x25, + 0x26, 0x70, 0xa9, 0xdb, 0x51, 0x17, 0x22, 0x01, 0xe6, 0xd4, 0x5b, 0xd4, 0x4b, 0xab, 0x6a, 0x0f, + 0x4b, 0x2b, 0x32, 0x95, 0x61, 0x8b, 0xd9, 0x0a, 0x15, 0x95, 0xed, 0x83, 0x70, 0x4c, 0xeb, 0xac, + 0x88, 0xb3, 0x4d, 0xa0, 0x8b, 0x17, 0xc1, 0x74, 0x5a, 0x3a, 0xac, 0xee, 0xb7, 0x77, 0x77, 0x9b, + 0x58, 0x59, 0x4e, 0x4f, 0xcb, 0xb8, 0x21, 0xf7, 0x46, 0xd4, 0x08, 0x0b, 0x5f, 0x06, 0xa3, 0x74, + 0x18, 0x2a, 0x37, 0x68, 0x4b, 0x9b, 0x97, 0xbb, 0x1d, 0x75, 0xba, 0x47, 0x0a, 0x35, 0xc4, 0xdd, + 0x70, 0x53, 0xe8, 0x56, 0x0a, 0xfe, 0xc1, 0x81, 0xe3, 0x35, 0x42, 0x45, 0x63, 0x9c, 0xeb, 0xdd, + 0x8e, 0x7a, 0x39, 0xdd, 0xad, 0xd4, 0x23, 0x8c, 0xd8, 0xac, 0xc4, 0x3c, 0x7a, 0x1c, 0x51, 0xdb, + 0xf3, 0x70, 0x70, 0xd2, 0x70, 0xdd, 0x4e, 0x57, 0xa9, 0x80, 0xf9, 0xc5, 0x96, 0x2b, 0x45, 0x81, + 0x45, 0x20, 0x9b, 0x47, 0x04, 0x07, 0x9e, 0xd3, 0x3c, 0x91, 0x59, 0x65, 0x32, 0x42, 0x40, 0x38, + 0x42, 0x88, 0x42, 0x19, 0x1a, 0xbc, 0x07, 0x26, 0xab, 0x24, 0xc0, 0x61, 0x88, 0x83, 0x50, 0xc1, + 0x6c, 0x51, 0x17, 0xba, 0x1d, 0x55, 0x8e, 0x2e, 0x88, 0xd8, 0xa5, 0xa1, 0x1e, 0x0c, 0xde, 0x05, + 0x13, 0x85, 0x7d, 0x5c, 0x7f, 0x4e, 0x29, 0xbb, 0x8c, 0x22, 0x3c, 0xd5, 0xf5, 0xc8, 0xa3, 0xa1, + 0x13, 0x10, 0x2d, 0x89, 0x9c, 0xbd, 0x89, 0x8f, 0x59, 0x1f, 0xcf, 0x9a, 0xa6, 0x51, 0xf1, 0x7c, + 0xf1, 0x99, 0xd8, 0x55, 0x1b, 0xba, 0x1f, 0x63, 0x0d, 0x25, 0x19, 0xf0, 0x31, 0x80, 0x09, 0x83, + 0xe5, 0x04, 0x7b, 0x98, 0x77, 0x4d, 0xa3, 0xf9, 0xe5, 0x6e, 0x47, 0xbd, 0xd6, 0x57, 0x47, 0x6f, + 0x52, 0x9c, 0x86, 0xfa, 0x90, 0xe1, 0x53, 0x70, 0xa1, 0x67, 0x6d, 0xef, 0xee, 0xba, 0x47, 0xc8, + 0xf1, 0xf6, 0xb0, 0xf2, 0x39, 0x17, 0xd5, 0xba, 0x1d, 0x75, 0x29, 0x2b, 0xca, 0x80, 0x7a, 0x40, + 0x91, 0x1a, 0xea, 0x2b, 0x00, 0x1d, 0x70, 0xa9, 0x9f, 0xdd, 0x3e, 0xf2, 0x94, 0x2f, 0xb8, 0xf6, + 0xcb, 0xdd, 0x8e, 0xaa, 0x9d, 0xaa, 0xad, 0x93, 0x23, 0x4f, 0x43, 0x83, 0x74, 0xe0, 0x06, 0x98, + 0x3b, 0x71, 0xd9, 0x47, 0x5e, 0xb9, 0x15, 0x2a, 0x5f, 0x72, 0x69, 0xe1, 0x04, 0x08, 0xd2, 0xe4, + 0xc8, 0xd3, 0xfd, 0x56, 0xa8, 0xa1, 0x34, 0x0d, 0xbe, 0x1f, 0xe7, 0x86, 0x17, 0xf7, 0x90, 0x77, + 0x90, 0xa3, 0x62, 0x01, 0x8e, 0x74, 0x78, 0x5b, 0x10, 0x9e, 0xa4, 0x26, 0x22, 0xc0, 0xd7, 0xe3, + 0x23, 0xf4, 0xb8, 0x52, 0xe5, 0xbd, 0xe3, 0xa8, 0xf8, 0x0e, 0x10, 0xb1, 0x3f, 0x6c, 0xf5, 0x0e, + 0xd1, 0xe3, 0x4a, 0x55, 0xfb, 0x66, 0x86, 0x77, 0x9b, 0xf4, 0x16, 0xef, 0xbd, 0x7e, 0x8a, 0xb7, + 0xb8, 0xe7, 0x1c, 0x60, 0x0d, 0x31, 0xa7, 0x58, 0x47, 0x86, 0xcf, 0x51, 0x47, 0x56, 0xc1, 0xd8, + 0x53, 0xc3, 0xa2, 0xe8, 0x5c, 0xba, 0x8c, 0x7c, 0xe4, 0x34, 0x39, 0x38, 0x42, 0xc0, 0x32, 0x58, + 0xd8, 0xc0, 0x4e, 0x40, 0x76, 0xb0, 0x43, 0x8a, 0x1e, 0xc1, 0xc1, 0xa1, 0xd3, 0x8c, 0xaa, 0x44, + 0x4e, 0xdc, 0xcd, 0xfd, 0x18, 0xa4, 0xbb, 0x11, 0x4a, 0x43, 0xfd, 0x98, 0xb0, 0x08, 0xe6, 0xcd, + 0x26, 0xae, 0xd3, 0x17, 0x78, 0xdb, 0x3d, 0xc0, 0x7e, 0x9b, 0x6c, 0x85, 0xac, 0x5a, 0xe4, 0xc4, + 0xa7, 0x1c, 0x47, 0x10, 0x9d, 0x70, 0x8c, 0x86, 0xb2, 0x2c, 0xfa, 0xa0, 0x5b, 0x6e, 0x48, 0xb0, + 0x27, 0xbc, 0x80, 0x2f, 0xa6, 0x6f, 0x9e, 0x26, 0x43, 0xc4, 0x2d, 0x7e, 0x3b, 0x68, 0x86, 0x1a, + 0xca, 0xd0, 0x20, 0x02, 0x0b, 0x46, 0xe3, 0x10, 0x07, 0xc4, 0x0d, 0xb1, 0xa0, 0x76, 0x91, 0xa9, + 0x09, 0x0f, 0x90, 0x13, 0x83, 0x92, 0x82, 0xfd, 0xc8, 0xf0, 0xed, 0xb8, 0xd5, 0x35, 0xda, 0xc4, + 0xb7, 0xad, 0x6a, 0x74, 0xeb, 0x0b, 0xb9, 0x71, 0xda, 0xc4, 0xd7, 0x09, 0x15, 0x48, 0x22, 0xe9, + 0x3d, 0xd8, 0x6b, 0xbd, 0x8d, 0x36, 0xd9, 0x57, 0x14, 0xc6, 0x1d, 0xd0, 0xad, 0x3b, 0xed, 0x54, + 0xb7, 0x4e, 0x29, 0xf0, 0x5b, 0xa2, 0xc8, 0xba, 0xdb, 0xc4, 0xca, 0x65, 0x96, 0x6e, 0xe1, 0x06, + 0x63, 0xec, 0x5d, 0x97, 0x5e, 0xfe, 0x29, 0x6c, 0x2f, 0xfa, 0x4d, 0x7c, 0xcc, 0xc8, 0x57, 0xd2, + 0x27, 0x8b, 0x3e, 0x39, 0x9c, 0x9b, 0x44, 0x42, 0x2b, 0xd3, 0x4a, 0x33, 0x81, 0xab, 0xe9, 0x46, + 0x5f, 0x68, 0xd3, 0xb8, 0x4e, 0x3f, 0x1a, 0xdd, 0x0b, 0x9e, 0x2e, 0xda, 0xc3, 0xb1, 0xac, 0xa8, + 0x2c, 0x2b, 0xc2, 0x5e, 0x44, 0x39, 0x66, 0xbd, 0x1f, 0x4f, 0x48, 0x8a, 0x02, 0x6d, 0x30, 0x7f, + 0x92, 0xa2, 0x13, 0x9d, 0x65, 0xa6, 0x23, 0xdc, 0x36, 0xae, 0xe7, 0x12, 0xd7, 0x69, 0xea, 0xbd, + 0x2c, 0x0b, 0x92, 0x59, 0x01, 0x5a, 0x9a, 0xe9, 0xff, 0x71, 0x7e, 0x6f, 0xb0, 0x1c, 0xa5, 0xfb, + 0xe3, 0x5e, 0x92, 0x45, 0x30, 0x7d, 0x41, 0x65, 0x9d, 0x7a, 0x32, 0xcd, 0x1a, 0x93, 0x10, 0x0e, + 0x1c, 0x6f, 0xef, 0x33, 0xb9, 0xee, 0xc3, 0xa5, 0x1d, 0x6d, 0xdc, 0xfb, 0xb3, 0xfd, 0xbe, 0x39, + 0xf8, 0x55, 0x81, 0x6f, 0x77, 0x02, 0x1e, 0x2f, 0x26, 0x4e, 0xf7, 0x4b, 0x03, 0x9b, 0x7d, 0x4e, + 0x16, 0xc1, 0x70, 0x2b, 0xd5, 0x9c, 0x33, 0x85, 0x5b, 0x67, 0xf5, 0xe6, 0x5c, 0x28, 0xcb, 0xa4, + 0x1d, 0x57, 0x91, 0xa7, 0xa2, 0xd0, 0x6c, 0xb3, 0x2f, 0x77, 0xb7, 0xd3, 0x67, 0x27, 0x4e, 0x55, + 0x9d, 0x03, 0x34, 0x94, 0x62, 0xd0, 0x27, 0x3a, 0x69, 0xa9, 0x12, 0x87, 0xe0, 0xa8, 0x11, 0x10, + 0x36, 0x38, 0x25, 0xa4, 0x87, 0x14, 0xa6, 0xa1, 0x7e, 0xe4, 0xac, 0xa6, 0xed, 0x3f, 0xc7, 0x9e, + 0xf2, 0xca, 0x59, 0x9a, 0x84, 0xc2, 0x32, 0x9a, 0x8c, 0x0c, 0x1f, 0x82, 0x99, 0xf8, 0xf5, 0xa0, + 0xe0, 0xb7, 0x3d, 0xa2, 0xdc, 0x67, 0x77, 0xa1, 0x58, 0x60, 0xe2, 0xf7, 0x90, 0x3a, 0xf5, 0xd3, + 0x02, 0x23, 0xe2, 0xa1, 0x05, 0xe6, 0x1f, 0xb7, 0x7d, 0xe2, 0xe4, 0x9d, 0xfa, 0x73, 0xec, 0x35, + 0xf2, 0xc7, 0x04, 0x87, 0xca, 0xeb, 0x4c, 0x44, 0x68, 0xbf, 0x3f, 0xa4, 0x10, 0x7d, 0x87, 0x63, + 0xf4, 0x1d, 0x0a, 0xd2, 0x50, 0x96, 0x48, 0x4b, 0x49, 0x25, 0xc0, 0x4f, 0x7c, 0x82, 0x95, 0x87, + 0xe9, 0xeb, 0xaa, 0x15, 0x60, 0xfd, 0xd0, 0xa7, 0xbb, 0x13, 0x63, 0xc4, 0x1d, 0xf1, 0x83, 0xa0, + 0xdd, 0x22, 0xac, 0xab, 0x51, 0xde, 0x4f, 0x1f, 0xe3, 0x93, 0x1d, 0xe1, 0x28, 0x9d, 0xf5, 0x41, + 0xc2, 0x8e, 0x08, 0xe4, 0xd5, 0x9f, 0xe6, 0x84, 0xef, 0xc0, 0x70, 0x0e, 0x4c, 0x95, 0xca, 0x76, + 0xad, 0x6a, 0x1b, 0xc8, 0x36, 0xd7, 0xe4, 0x21, 0x78, 0x11, 0xc0, 0x62, 0xa9, 0x68, 0x17, 0x0d, + 0x8b, 0x1b, 0x6b, 0xa6, 0x5d, 0x58, 0x93, 0x01, 0x94, 0xc1, 0x34, 0x32, 0x05, 0xcb, 0x14, 0xb5, + 0x54, 0x8b, 0x8f, 0x6c, 0x13, 0x6d, 0x71, 0xcb, 0x05, 0xb8, 0x0c, 0xae, 0x55, 0x8b, 0x8f, 0x1e, + 0x6f, 0x17, 0x39, 0xa6, 0x66, 0x94, 0xd6, 0x6a, 0xc8, 0xdc, 0x2a, 0x3f, 0x31, 0x6b, 0x6b, 0x86, + 0x6d, 0xc8, 0x8b, 0x70, 0x1e, 0xcc, 0x54, 0x8d, 0x27, 0x66, 0xad, 0x5a, 0x32, 0x2a, 0xd5, 0x8d, + 0xb2, 0x2d, 0x2f, 0xc1, 0x1b, 0xe0, 0x3a, 0x15, 0x2e, 0x23, 0xb3, 0x16, 0x4f, 0xb0, 0x8e, 0xca, + 0x5b, 0x3d, 0x88, 0x0a, 0x2f, 0x83, 0xc5, 0xfe, 0xae, 0x65, 0xca, 0xce, 0x4c, 0x69, 0xa0, 0xc2, + 0x46, 0x31, 0x9e, 0x73, 0x05, 0xde, 0x05, 0xaf, 0x9c, 0x16, 0x15, 0x1b, 0x57, 0xed, 0x72, 0xa5, + 0x66, 0x3c, 0x32, 0x4b, 0xb6, 0x7c, 0x1b, 0x5e, 0x07, 0x97, 0xf3, 0x96, 0x51, 0xd8, 0xdc, 0x28, + 0x5b, 0x66, 0xad, 0x62, 0x9a, 0xa8, 0x56, 0x29, 0x23, 0xbb, 0x66, 0x3f, 0xab, 0xa1, 0x67, 0x72, + 0x03, 0xaa, 0xe0, 0xea, 0x76, 0x69, 0x30, 0x00, 0xc3, 0x2b, 0x60, 0x71, 0xcd, 0xb4, 0x8c, 0x0f, + 0x32, 0xae, 0x17, 0x12, 0xbc, 0x06, 0x2e, 0x6d, 0x97, 0xfa, 0x7b, 0x3f, 0x95, 0x56, 0xff, 0x0e, + 0xc0, 0x08, 0xed, 0xfb, 0xa1, 0x02, 0x2e, 0xc4, 0x7b, 0x5b, 0x2e, 0x99, 0xb5, 0xf5, 0xb2, 0x65, + 0x95, 0x9f, 0x9a, 0x48, 0x1e, 0x8a, 0x56, 0x93, 0xf1, 0xd4, 0xb6, 0x4b, 0x76, 0xd1, 0xaa, 0xd9, + 0xa8, 0xf8, 0xe8, 0x91, 0x89, 0x7a, 0x3b, 0x24, 0x41, 0x08, 0x66, 0x63, 0x82, 0x65, 0x1a, 0x6b, + 0x26, 0x92, 0x87, 0xe1, 0x6d, 0x70, 0x2b, 0x69, 0x1b, 0x44, 0xcf, 0x89, 0xf4, 0xc7, 0xdb, 0x65, + 0xb4, 0xbd, 0x25, 0x8f, 0xd0, 0x43, 0x13, 0xdb, 0x0c, 0xcb, 0x92, 0x47, 0xe1, 0x4d, 0xa0, 0xc6, + 0x5b, 0x2c, 0xec, 0x6e, 0x22, 0x72, 0x00, 0x1f, 0x80, 0x37, 0xce, 0x00, 0x0d, 0x8a, 0x62, 0x8a, + 0xa6, 0xa4, 0x0f, 0x37, 0x5a, 0xcf, 0x34, 0x7c, 0x1d, 0xbc, 0x36, 0xd0, 0x3d, 0x48, 0x74, 0x06, + 0xae, 0x83, 0x7c, 0x1f, 0x16, 0x5f, 0x65, 0x64, 0xe1, 0xe7, 0x32, 0x12, 0x8a, 0xa9, 0xd1, 0x21, + 0x2c, 0x20, 0xc3, 0x2e, 0x6c, 0xc8, 0xb3, 0x70, 0x15, 0xbc, 0x3c, 0xf0, 0x38, 0x24, 0x37, 0xa1, + 0x01, 0x0d, 0xf0, 0xee, 0xf9, 0xb0, 0x83, 0xc2, 0xc6, 0xf0, 0x25, 0xb0, 0x3c, 0x58, 0x22, 0xda, + 0x92, 0x5d, 0xf8, 0x0e, 0x78, 0xf3, 0x2c, 0xd4, 0xa0, 0x29, 0xf6, 0x4e, 0x9f, 0x22, 0x3a, 0x06, + 0xfb, 0xf4, 0xd9, 0x1b, 0x8c, 0xa2, 0x07, 0xc3, 0x85, 0xff, 0x07, 0xb4, 0xbe, 0x87, 0x3d, 0xb9, + 0x2d, 0x2f, 0x24, 0x78, 0x07, 0xdc, 0x46, 0x46, 0x69, 0xad, 0xbc, 0x55, 0x3b, 0x07, 0xfe, 0x53, + 0x09, 0xbe, 0x07, 0xde, 0x3e, 0x1b, 0x38, 0x68, 0x81, 0x9f, 0x49, 0xd0, 0x04, 0xef, 0x9f, 0x7b, + 0xbe, 0x41, 0x32, 0x9f, 0x4b, 0xf0, 0x06, 0xb8, 0xd6, 0x9f, 0x1f, 0xe5, 0xe1, 0x0b, 0x09, 0xae, + 0x80, 0x9b, 0xa7, 0xce, 0x14, 0x21, 0xbf, 0x94, 0xe0, 0x5b, 0xe0, 0xfe, 0x69, 0x90, 0x41, 0x61, + 0xfc, 0x5a, 0x82, 0x0f, 0xc1, 0x83, 0x73, 0xcc, 0x31, 0x48, 0xe0, 0x37, 0xa7, 0xac, 0x23, 0x4a, + 0xf6, 0x57, 0x67, 0xaf, 0x23, 0x42, 0xfe, 0x56, 0x82, 0x4b, 0xe0, 0x72, 0x7f, 0x08, 0x3d, 0x13, + 0xbf, 0x93, 0xe0, 0x2d, 0xb0, 0x7c, 0xaa, 0x12, 0x85, 0xfd, 0x5e, 0x82, 0x0a, 0x58, 0x28, 0x95, + 0x6b, 0xeb, 0x46, 0xd1, 0xaa, 0x3d, 0x2d, 0xda, 0x1b, 0xb5, 0xaa, 0x8d, 0xcc, 0x6a, 0x55, 0xfe, + 0xc5, 0x30, 0x0d, 0x25, 0xe1, 0x29, 0x95, 0x23, 0x67, 0x6d, 0xbd, 0x8c, 0x6a, 0x56, 0xf1, 0x89, + 0x59, 0xa2, 0xc8, 0x4f, 0x86, 0xe1, 0x1c, 0x00, 0x14, 0x56, 0x29, 0x17, 0x4b, 0x76, 0x55, 0xfe, + 0x6e, 0x0e, 0xce, 0x80, 0x09, 0xf3, 0x99, 0x6d, 0xa2, 0x92, 0x61, 0xc9, 0xff, 0xc8, 0xad, 0x1e, + 0x80, 0x89, 0xf8, 0xd3, 0x02, 0x1c, 0x03, 0xc3, 0x9b, 0x4f, 0xe4, 0x21, 0x38, 0x09, 0x46, 0x2d, + 0xd3, 0xa8, 0x9a, 0xb2, 0x04, 0x17, 0xc0, 0x9c, 0x69, 0x99, 0x05, 0xbb, 0x58, 0x2e, 0xd5, 0xd0, + 0x76, 0xa9, 0xc4, 0x2e, 0x4f, 0x19, 0x4c, 0x3f, 0xa5, 0x4f, 0x7e, 0x6c, 0xc9, 0xc1, 0x45, 0x30, + 0x6f, 0x95, 0x0b, 0x9b, 0x35, 0x64, 0x14, 0x4c, 0x14, 0x9b, 0x47, 0x28, 0x90, 0x09, 0xc5, 0x96, + 0xd1, 0xd5, 0x3c, 0x18, 0x8f, 0xbe, 0x4b, 0xc0, 0x29, 0x30, 0xbe, 0xf9, 0xa4, 0xb6, 0x61, 0x54, + 0x37, 0xe4, 0xa1, 0x1e, 0xd2, 0x7c, 0x56, 0x29, 0x22, 0x3a, 0x33, 0x00, 0x63, 0x27, 0x13, 0x4e, + 0x83, 0x89, 0x52, 0xb9, 0x56, 0xd8, 0x30, 0x0b, 0x9b, 0x72, 0xee, 0xde, 0x43, 0x30, 0x69, 0x07, + 0x8e, 0x17, 0xb6, 0xfc, 0x80, 0xc0, 0x7b, 0xe2, 0x60, 0x36, 0xfa, 0x3a, 0x1a, 0xfd, 0xe0, 0x7b, + 0x65, 0xee, 0x64, 0xcc, 0x7f, 0x0b, 0xd4, 0x86, 0x56, 0xa4, 0xd7, 0xa4, 0xfc, 0x85, 0x17, 0x7f, + 0x59, 0x1a, 0x7a, 0xf1, 0xf5, 0x92, 0xf4, 0xd5, 0xd7, 0x4b, 0xd2, 0x9f, 0xbf, 0x5e, 0x92, 0x7e, + 0xf2, 0xd7, 0xa5, 0xa1, 0x9d, 0x31, 0xf6, 0x83, 0xf1, 0xfd, 0xff, 0x05, 0x00, 0x00, 0xff, 0xff, + 0x5c, 0x9f, 0x8c, 0x37, 0x79, 0x1e, 0x00, 0x00, } diff --git a/functional/rpcpb/rpc.proto b/functional/rpcpb/rpc.proto index 4e3b4a9f175..c7f6ea00386 100644 --- a/functional/rpcpb/rpc.proto +++ b/functional/rpcpb/rpc.proto @@ -16,11 +16,28 @@ message Request { Tester Tester = 3; } +// SnapshotInfo contains SAVE_SNAPSHOT request results. +message SnapshotInfo { + string MemberName = 1; + repeated string MemberClientURLs = 2; + string SnapshotPath = 3; + string SnapshotFileSize = 4; + string SnapshotTotalSize = 5; + int64 SnapshotTotalKey = 6; + int64 SnapshotHash = 7; + int64 SnapshotRevision = 8; + string Took = 9; +} + message Response { bool Success = 1; string Status = 2; + // Member contains the same Member object from tester request. Member Member = 3; + + // SnapshotInfo contains SAVE_SNAPSHOT request results. + SnapshotInfo SnapshotInfo = 4; } service Transport { @@ -30,7 +47,6 @@ service Transport { message Member { // EtcdExecPath is the executable etcd binary path in agent server. string EtcdExecPath = 1 [(gogoproto.moretags) = "yaml:\"etcd-exec-path\""]; - // TODO: support embedded etcd // AgentAddr is the agent HTTP server address. @@ -54,6 +70,9 @@ message Member { string EtcdClientEndpoint = 301 [(gogoproto.moretags) = "yaml:\"etcd-client-endpoint\""]; // Etcd defines etcd binary configuration flags. Etcd Etcd = 302 [(gogoproto.moretags) = "yaml:\"etcd\""]; + // EtcdOnSnapshotRestore defines one-time use configuration during etcd + // snapshot recovery process. + Etcd EtcdOnSnapshotRestore = 303; // ClientCertData contains cert file contents from this member's etcd server. string ClientCertData = 401 [(gogoproto.moretags) = "yaml:\"client-cert-data\""]; @@ -74,6 +93,11 @@ message Member { // PeerTrustedCAData contains trusted CA file contents from this member's etcd server. string PeerTrustedCAData = 505 [(gogoproto.moretags) = "yaml:\"peer-trusted-ca-data\""]; string PeerTrustedCAPath = 506 [(gogoproto.moretags) = "yaml:\"peer-trusted-ca-path\""]; + + // SnapshotPath is the snapshot file path to store or restore from. + string SnapshotPath = 601 [(gogoproto.moretags) = "yaml:\"snapshot-path\""]; + // SnapshotInfo contains last SAVE_SNAPSHOT request results. + SnapshotInfo SnapshotInfo = 602; } message Tester { @@ -93,22 +117,21 @@ message Tester { // RoundLimit is the limit of rounds to run failure set (-1 to run without limits). int32 RoundLimit = 21 [(gogoproto.moretags) = "yaml:\"round-limit\""]; - // ExitOnFailure is true, then exit tester on first failure. - bool ExitOnFailure = 22 [(gogoproto.moretags) = "yaml:\"exit-on-failure\""]; - // ConsistencyCheck is true to check consistency (revision, hash). - bool ConsistencyCheck = 23 [(gogoproto.moretags) = "yaml:\"consistency-check\""]; + // ExitOnCaseFail is true, then exit tester on first failure. + bool ExitOnCaseFail = 22 [(gogoproto.moretags) = "yaml:\"exit-on-failure\""]; // EnablePprof is true to enable profiler. - bool EnablePprof = 24 [(gogoproto.moretags) = "yaml:\"enable-pprof\""]; + bool EnablePprof = 23 [(gogoproto.moretags) = "yaml:\"enable-pprof\""]; - // FailureDelayMs is the delay duration after failure is injected. + // CaseDelayMs is the delay duration after failure is injected. // Useful when triggering snapshot or no-op failure cases. - uint32 FailureDelayMs = 31 [(gogoproto.moretags) = "yaml:\"failure-delay-ms\""]; - // FailureShuffle is true to randomize failure injecting order. - bool FailureShuffle = 32 [(gogoproto.moretags) = "yaml:\"failure-shuffle\""]; - // FailureCases is the selected test cases to schedule. + uint32 CaseDelayMs = 31 [(gogoproto.moretags) = "yaml:\"case-delay-ms\""]; + // CaseShuffle is true to randomize failure injecting order. + bool CaseShuffle = 32 [(gogoproto.moretags) = "yaml:\"case-shuffle\""]; + // Cases is the selected test cases to schedule. // If empty, run all failure cases. - repeated string FailureCases = 33 [(gogoproto.moretags) = "yaml:\"failure-cases\""]; - // Failpoinommands is the list of "gofail" commands (e.g. panic("etcd-tester"),1*sleep(1000) + repeated string Cases = 33 [(gogoproto.moretags) = "yaml:\"cases\""]; + // FailpointCommands is the list of "gofail" commands + // (e.g. panic("etcd-tester"),1*sleep(1000). repeated string FailpointCommands = 34 [(gogoproto.moretags) = "yaml:\"failpoint-commands\""]; // RunnerExecPath is a path of etcd-runner binary. @@ -116,27 +139,32 @@ message Tester { // ExternalExecPath is a path of script for enabling/disabling an external fault injector. string ExternalExecPath = 42 [(gogoproto.moretags) = "yaml:\"external-exec-path\""]; - // StressTypes is the list of stresser names: - // keys, lease, nop, election-runner, watch-runner, lock-racer-runner, lease-runner. - repeated string StressTypes = 101 [(gogoproto.moretags) = "yaml:\"stress-types\""]; + // Stressers is the list of stresser types: + // KV, LEASE, ELECTION_RUNNER, WATCH_RUNNER, LOCK_RACER_RUNNER, LEASE_RUNNER. + repeated string Stressers = 101 [(gogoproto.moretags) = "yaml:\"stressers\""]; + // Checkers is the list of consistency checker types: + // KV_HASH, LEASE_EXPIRE, NO_CHECK, RUNNER. + // Leave empty to skip consistency checks. + repeated string Checkers = 102 [(gogoproto.moretags) = "yaml:\"checkers\""]; + // StressKeySize is the size of each small key written into etcd. - int32 StressKeySize = 102 [(gogoproto.moretags) = "yaml:\"stress-key-size\""]; + int32 StressKeySize = 201 [(gogoproto.moretags) = "yaml:\"stress-key-size\""]; // StressKeySizeLarge is the size of each large key written into etcd. - int32 StressKeySizeLarge = 103 [(gogoproto.moretags) = "yaml:\"stress-key-size-large\""]; + int32 StressKeySizeLarge = 202 [(gogoproto.moretags) = "yaml:\"stress-key-size-large\""]; // StressKeySuffixRange is the count of key range written into etcd. // Stress keys are created with "fmt.Sprintf("foo%016x", rand.Intn(keySuffixRange)". - int32 StressKeySuffixRange = 104 [(gogoproto.moretags) = "yaml:\"stress-key-suffix-range\""]; + int32 StressKeySuffixRange = 203 [(gogoproto.moretags) = "yaml:\"stress-key-suffix-range\""]; // StressKeySuffixRangeTxn is the count of key range written into etcd txn (max 100). // Stress keys are created with "fmt.Sprintf("/k%03d", i)". - int32 StressKeySuffixRangeTxn = 105 [(gogoproto.moretags) = "yaml:\"stress-key-suffix-range-txn\""]; + int32 StressKeySuffixRangeTxn = 204 [(gogoproto.moretags) = "yaml:\"stress-key-suffix-range-txn\""]; // StressKeyTxnOps is the number of operations per a transaction (max 64). - int32 StressKeyTxnOps = 106 [(gogoproto.moretags) = "yaml:\"stress-key-txn-ops\""]; + int32 StressKeyTxnOps = 205 [(gogoproto.moretags) = "yaml:\"stress-key-txn-ops\""]; // StressClients is the number of concurrent stressing clients // with "one" shared TCP connection. - int32 StressClients = 201 [(gogoproto.moretags) = "yaml:\"stress-clients\""]; + int32 StressClients = 301 [(gogoproto.moretags) = "yaml:\"stress-clients\""]; // StressQPS is the maximum number of stresser requests per second. - int32 StressQPS = 202 [(gogoproto.moretags) = "yaml:\"stress-qps\""]; + int32 StressQPS = 302 [(gogoproto.moretags) = "yaml:\"stress-qps\""]; } message Etcd { @@ -194,12 +222,24 @@ enum Operation { // directories to simulate destroying the whole machine. SIGQUIT_ETCD_AND_REMOVE_DATA = 21; + // SAVE_SNAPSHOT is sent to trigger local member to download its snapshot + // onto its local disk with the specified path from tester. + SAVE_SNAPSHOT = 30; + // RESTORE_RESTART_FROM_SNAPSHOT is sent to trigger local member to + // restore a cluster from existing snapshot from disk, and restart + // an etcd instance from recovered data. + RESTORE_RESTART_FROM_SNAPSHOT = 31; + // RESTART_FROM_SNAPSHOT is sent to trigger local member to restart + // and join an existing cluster that has been recovered from a snapshot. + // Local member joins this cluster with fresh data. + RESTART_FROM_SNAPSHOT = 32; + // SIGQUIT_ETCD_AND_ARCHIVE_DATA is sent when consistency check failed, // thus need to archive etcd data directories. - SIGQUIT_ETCD_AND_ARCHIVE_DATA = 30; + SIGQUIT_ETCD_AND_ARCHIVE_DATA = 40; // SIGQUIT_ETCD_AND_REMOVE_DATA_AND_STOP_AGENT destroys etcd process, // etcd data, and agent server. - SIGQUIT_ETCD_AND_REMOVE_DATA_AND_STOP_AGENT = 31; + SIGQUIT_ETCD_AND_REMOVE_DATA_AND_STOP_AGENT = 41; // BLACKHOLE_PEER_PORT_TX_RX drops all outgoing/incoming packets from/to // the peer port on target member's peer port. @@ -214,12 +254,12 @@ enum Operation { UNDELAY_PEER_PORT_TX_RX = 201; } -// FailureCase defines various system faults in distributed systems, +// Case defines various system faults or test case in distributed systems, // in order to verify correct behavior of etcd servers and clients. -enum FailureCase { +enum Case { // SIGTERM_ONE_FOLLOWER stops a randomly chosen follower (non-leader) // but does not delete its data directories on disk for next restart. - // It waits "failure-delay-ms" before recovering this failure. + // It waits "delay-ms" before recovering this failure. // The expected behavior is that the follower comes back online // and rejoins the cluster, and then each member continues to process // client requests ('Put' request that requires Raft consensus). @@ -237,9 +277,8 @@ enum FailureCase { SIGTERM_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT = 1; // SIGTERM_LEADER stops the active leader node but does not delete its - // data directories on disk for next restart. Then it waits - // "failure-delay-ms" before recovering this failure, in order to - // trigger election timeouts. + // data directories on disk for next restart. Then it waits "delay-ms" + // before recovering this failure, in order to trigger election timeouts. // The expected behavior is that a new leader gets elected, and the // old leader comes back online and rejoins the cluster as a follower. // As always, after recovery, each member must be able to process @@ -259,16 +298,15 @@ enum FailureCase { // SIGTERM_QUORUM stops majority number of nodes to make the whole cluster // inoperable but does not delete data directories on stopped nodes - // for next restart. And it waits "failure-delay-ms" before recovering - // this failure. + // for next restart. And it waits "delay-ms" before recovering failure. // The expected behavior is that nodes come back online, thus cluster // comes back operative as well. As always, after recovery, each member // must be able to process client requests. SIGTERM_QUORUM = 4; // SIGTERM_ALL stops the whole cluster but does not delete data directories - // on disk for next restart. And it waits "failure-delay-ms" before - // recovering this failure. + // on disk for next restart. And it waits "delay-ms" before recovering + // this failure. // The expected behavior is that nodes come back online, thus cluster // comes back operative as well. As always, after recovery, each member // must be able to process client requests. @@ -278,7 +316,7 @@ enum FailureCase { // (non-leader), deletes its data directories on disk, and removes // this member from cluster (membership reconfiguration). On recovery, // tester adds a new member, and this member joins the existing cluster - // with fresh data. It waits "failure-delay-ms" before recovering this + // with fresh data. It waits "delay-ms" before recovering this // failure. This simulates destroying one follower machine, where operator // needs to add a new member from a fresh machine. // The expected behavior is that a new member joins the existing cluster, @@ -301,7 +339,7 @@ enum FailureCase { // SIGQUIT_AND_REMOVE_LEADER stops the active leader node, deletes its // data directories on disk, and removes this member from cluster. // On recovery, tester adds a new member, and this member joins the - // existing cluster with fresh data. It waits "failure-delay-ms" before + // existing cluster with fresh data. It waits "delay-ms" before // recovering this failure. This simulates destroying a leader machine, // where operator needs to add a new member from a fresh machine. // The expected behavior is that a new member joins the existing cluster, @@ -322,9 +360,36 @@ enum FailureCase { // member must be able to process client requests. SIGQUIT_AND_REMOVE_LEADER_UNTIL_TRIGGER_SNAPSHOT = 13; + // SIGQUIT_AND_REMOVE_QUORUM_AND_RESTORE_LEADER_SNAPSHOT_FROM_SCRATCH first + // stops majority number of nodes, deletes data directories on those quorum + // nodes, to make the whole cluster inoperable. Now that quorum and their + // data are totally destroyed, cluster cannot even remove unavailable nodes + // (e.g. 2 out of 3 are lost, so no leader can be elected). + // Let's assume 3-node cluster of node A, B, and C. One day, node A and B + // are destroyed and all their data are gone. The only viable solution is + // to recover from C's latest snapshot. + // + // To simulate: + // 1. Assume node C is the current leader with most up-to-date data. + // 2. Download snapshot from node C, before destroying node A and B. + // 3. Destroy node A and B, and make the whole cluster inoperable. + // 4. Now node C cannot operate either. + // 5. SIGTERM node C and remove its data directories. + // 6. Restore a new seed member from node C's latest snapshot file. + // 7. Add another member to establish 2-node cluster. + // 8. Add another member to establish 3-node cluster. + // 9. Add more if any. + // + // The expected behavior is that etcd successfully recovers from such + // disastrous situation as only 1-node survives out of 3-node cluster, + // new members joins the existing cluster, and previous data from snapshot + // are still preserved after recovery process. As always, after recovery, + // each member must be able to process client requests. + SIGQUIT_AND_REMOVE_QUORUM_AND_RESTORE_LEADER_SNAPSHOT_FROM_SCRATCH = 14; + // BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER drops all outgoing/incoming // packets from/to the peer port on a randomly chosen follower - // (non-leader), and waits for "failure-delay-ms" until recovery. + // (non-leader), and waits for "delay-ms" until recovery. // The expected behavior is that once dropping operation is undone, // each member must be able to process client requests. BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER = 100; @@ -342,7 +407,7 @@ enum FailureCase { // BLACKHOLE_PEER_PORT_TX_RX_LEADER drops all outgoing/incoming packets // from/to the peer port on the active leader (isolated), and waits for - // "failure-delay-ms" until recovery, in order to trigger election timeout. + // "delay-ms" until recovery, in order to trigger election timeout. // The expected behavior is that after election timeout, a new leader gets // elected, and once dropping operation is undone, the old leader comes // back and rejoins the cluster as a follower. As always, after recovery, @@ -362,7 +427,7 @@ enum FailureCase { // BLACKHOLE_PEER_PORT_TX_RX_QUORUM drops all outgoing/incoming packets // from/to the peer ports on majority nodes of cluster, thus losing its - // leader and cluster being inoperable. And it waits for "failure-delay-ms" + // leader and cluster being inoperable. And it waits for "delay-ms" // until recovery. // The expected behavior is that once packet drop operation is undone, // nodes come back online, thus cluster comes back operative. As always, @@ -371,7 +436,7 @@ enum FailureCase { // BLACKHOLE_PEER_PORT_TX_RX_ALL drops all outgoing/incoming packets // from/to the peer ports on all nodes, thus making cluster totally - // inoperable. It waits for "failure-delay-ms" until recovery. + // inoperable. It waits for "delay-ms" until recovery. // The expected behavior is that once packet drop operation is undone, // nodes come back online, thus cluster comes back operative. As always, // after recovery, each member must be able to process client requests. @@ -379,7 +444,7 @@ enum FailureCase { // DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER delays outgoing/incoming packets // from/to the peer port on a randomly chosen follower (non-leader). - // It waits for "failure-delay-ms" until recovery. + // It waits for "delay-ms" until recovery. // The expected behavior is that once packet delay operation is undone, // the follower comes back and tries to catch up with latest changes from // cluster. And as always, after recovery, each member must be able to @@ -388,8 +453,8 @@ enum FailureCase { // RANDOM_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER delays outgoing/incoming // packets from/to the peer port on a randomly chosen follower - // (non-leader) with a randomized time duration (thus isolated). It waits - // for "failure-delay-ms" until recovery. + // (non-leader) with a randomized time duration (thus isolated). It + // waits for "delay-ms" until recovery. // The expected behavior is that once packet delay operation is undone, // each member must be able to process client requests. RANDOM_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER = 201; @@ -418,8 +483,8 @@ enum FailureCase { RANDOM_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT = 203; // DELAY_PEER_PORT_TX_RX_LEADER delays outgoing/incoming packets from/to - // the peer port on the active leader. And waits for "failure-delay-ms" - // until recovery. + // the peer port on the active leader. And waits for "delay-ms" until + // recovery. // The expected behavior is that cluster may elect a new leader, and // once packet delay operation is undone, the (old) leader comes back // and tries to catch up with latest changes from cluster. As always, @@ -428,7 +493,7 @@ enum FailureCase { // RANDOM_DELAY_PEER_PORT_TX_RX_LEADER delays outgoing/incoming packets // from/to the peer port on the active leader with a randomized time - // duration. And waits for "failure-delay-ms" until recovery. + // duration. And waits for "delay-ms" until recovery. // The expected behavior is that cluster may elect a new leader, and // once packet delay operation is undone, the (old) leader comes back // and tries to catch up with latest changes from cluster. As always, @@ -462,7 +527,7 @@ enum FailureCase { // DELAY_PEER_PORT_TX_RX_QUORUM delays outgoing/incoming packets from/to // the peer ports on majority nodes of cluster. And it waits for - // "failure-delay-ms" until recovery, likely to trigger election timeouts. + // "delay-ms" until recovery, likely to trigger election timeouts. // The expected behavior is that cluster may elect a new leader, while // quorum of nodes struggle with slow networks, and once delay operation // is undone, nodes come back and cluster comes back operative. As always, @@ -471,8 +536,8 @@ enum FailureCase { // RANDOM_DELAY_PEER_PORT_TX_RX_QUORUM delays outgoing/incoming packets // from/to the peer ports on majority nodes of cluster, with randomized - // time durations. And it waits for "failure-delay-ms" until recovery, - // likely to trigger election timeouts. + // time durations. And it waits for "delay-ms" until recovery, likely + // to trigger election timeouts. // The expected behavior is that cluster may elect a new leader, while // quorum of nodes struggle with slow networks, and once delay operation // is undone, nodes come back and cluster comes back operative. As always, @@ -480,8 +545,8 @@ enum FailureCase { RANDOM_DELAY_PEER_PORT_TX_RX_QUORUM = 209; // DELAY_PEER_PORT_TX_RX_ALL delays outgoing/incoming packets from/to the - // peer ports on all nodes. And it waits for "failure-delay-ms" until - // recovery, likely to trigger election timeouts. + // peer ports on all nodes. And it waits for "delay-ms" until recovery, + // likely to trigger election timeouts. // The expected behavior is that cluster may become totally inoperable, // struggling with slow networks across the whole cluster. Once delay // operation is undone, nodes come back and cluster comes back operative. @@ -491,7 +556,7 @@ enum FailureCase { // RANDOM_DELAY_PEER_PORT_TX_RX_ALL delays outgoing/incoming packets // from/to the peer ports on all nodes, with randomized time durations. - // And it waits for "failure-delay-ms" until recovery, likely to trigger + // And it waits for "delay-ms" until recovery, likely to trigger // election timeouts. // The expected behavior is that cluster may become totally inoperable, // struggling with slow networks across the whole cluster. Once delay @@ -500,14 +565,26 @@ enum FailureCase { // requests. RANDOM_DELAY_PEER_PORT_TX_RX_ALL = 211; - // NO_FAIL_WITH_STRESS runs no-op failure injection that does not do - // anything against cluster for "failure-delay-ms" duration, while - // stressers are still sending requests. + // NO_FAIL_WITH_STRESS stops injecting failures while testing the + // consistency and correctness under pressure loads, for the duration of + // "delay-ms". Goal is to ensure cluster be still making progress + // on recovery, and verify system does not deadlock following a sequence + // of failure injections. + // The expected behavior is that cluster remains fully operative in healthy + // condition. As always, after recovery, each member must be able to process + // client requests. NO_FAIL_WITH_STRESS = 300; - // NO_FAIL_WITH_NO_STRESS_FOR_LIVENESS runs no-op failure injection - // that does not do anything against cluster for "failure-delay-ms" - // duration, while all stressers are stopped. + // NO_FAIL_WITH_NO_STRESS_FOR_LIVENESS neither injects failures nor + // sends stressig client requests to the cluster, for the duration of + // "delay-ms". Goal is to ensure cluster be still making progress + // on recovery, and verify system does not deadlock following a sequence + // of failure injections. + // The expected behavior is that cluster remains fully operative in healthy + // condition, and clients requests during liveness period succeed without + // errors. + // Note: this is how Google Chubby does failure injection testing + // https://static.googleusercontent.com/media/research.google.com/en//archive/paxos_made_live.pdf. NO_FAIL_WITH_NO_STRESS_FOR_LIVENESS = 301; // FAILPOINTS injects failpoints to etcd server runtime, triggering panics @@ -518,7 +595,7 @@ enum FailureCase { EXTERNAL = 500; } -enum StressType { +enum Stresser { KV = 0; LEASE = 1; ELECTION_RUNNER = 2; @@ -526,3 +603,10 @@ enum StressType { LOCK_RACER_RUNNER = 4; LEASE_RUNNER = 5; } + +enum Checker { + KV_HASH = 0; + LEASE_EXPIRE = 1; + RUNNER = 2; + NO_CHECK = 3; +} diff --git a/functional/tester/case.go b/functional/tester/case.go new file mode 100644 index 00000000000..26d0ff111b0 --- /dev/null +++ b/functional/tester/case.go @@ -0,0 +1,320 @@ +// Copyright 2018 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tester + +import ( + "fmt" + "math/rand" + "time" + + "github.com/coreos/etcd/functional/rpcpb" + + "go.uber.org/zap" +) + +// Case defines failure/test injection interface. +// To add a test case: +// 1. implement "Case" interface +// 2. define fail case name in "rpcpb.Case" +type Case interface { + // Inject injeccts the failure into the testing cluster at the given + // round. When calling the function, the cluster should be in health. + Inject(clus *Cluster) error + // Recover recovers the injected failure caused by the injection of the + // given round and wait for the recovery of the testing cluster. + Recover(clus *Cluster) error + // Desc returns a description of the failure + Desc() string + // TestCase returns "rpcpb.Case" enum type. + TestCase() rpcpb.Case +} + +type injectMemberFunc func(*Cluster, int) error +type recoverMemberFunc func(*Cluster, int) error + +type caseByFunc struct { + desc string + rpcpbCase rpcpb.Case + injectMember injectMemberFunc + recoverMember recoverMemberFunc +} + +func (c *caseByFunc) Desc() string { + if c.desc != "" { + return c.desc + } + return c.rpcpbCase.String() +} + +func (c *caseByFunc) TestCase() rpcpb.Case { + return c.rpcpbCase +} + +type caseFollower struct { + caseByFunc + last int + lead int +} + +func (c *caseFollower) updateIndex(clus *Cluster) error { + lead, err := clus.GetLeader() + if err != nil { + return err + } + c.lead = lead + + n := len(clus.Members) + if c.last == -1 { // first run + c.last = clus.rd % n + if c.last == c.lead { + c.last = (c.last + 1) % n + } + } else { + c.last = (c.last + 1) % n + if c.last == c.lead { + c.last = (c.last + 1) % n + } + } + return nil +} + +func (c *caseFollower) Inject(clus *Cluster) error { + if err := c.updateIndex(clus); err != nil { + return err + } + return c.injectMember(clus, c.last) +} + +func (c *caseFollower) Recover(clus *Cluster) error { + return c.recoverMember(clus, c.last) +} + +func (c *caseFollower) Desc() string { + if c.desc != "" { + return c.desc + } + return c.rpcpbCase.String() +} + +func (c *caseFollower) TestCase() rpcpb.Case { + return c.rpcpbCase +} + +type caseLeader struct { + caseByFunc + last int + lead int +} + +func (c *caseLeader) updateIndex(clus *Cluster) error { + lead, err := clus.GetLeader() + if err != nil { + return err + } + c.lead = lead + c.last = lead + return nil +} + +func (c *caseLeader) Inject(clus *Cluster) error { + if err := c.updateIndex(clus); err != nil { + return err + } + return c.injectMember(clus, c.last) +} + +func (c *caseLeader) Recover(clus *Cluster) error { + return c.recoverMember(clus, c.last) +} + +func (c *caseLeader) TestCase() rpcpb.Case { + return c.rpcpbCase +} + +type caseQuorum struct { + caseByFunc + injected map[int]struct{} +} + +func (c *caseQuorum) Inject(clus *Cluster) error { + c.injected = pickQuorum(len(clus.Members)) + for idx := range c.injected { + if err := c.injectMember(clus, idx); err != nil { + return err + } + } + return nil +} + +func (c *caseQuorum) Recover(clus *Cluster) error { + for idx := range c.injected { + if err := c.recoverMember(clus, idx); err != nil { + return err + } + } + return nil +} + +func (c *caseQuorum) Desc() string { + if c.desc != "" { + return c.desc + } + return c.rpcpbCase.String() +} + +func (c *caseQuorum) TestCase() rpcpb.Case { + return c.rpcpbCase +} + +func pickQuorum(size int) (picked map[int]struct{}) { + picked = make(map[int]struct{}) + r := rand.New(rand.NewSource(time.Now().UnixNano())) + quorum := size/2 + 1 + for len(picked) < quorum { + idx := r.Intn(size) + picked[idx] = struct{}{} + } + return picked +} + +type caseAll caseByFunc + +func (c *caseAll) Inject(clus *Cluster) error { + for i := range clus.Members { + if err := c.injectMember(clus, i); err != nil { + return err + } + } + return nil +} + +func (c *caseAll) Recover(clus *Cluster) error { + for i := range clus.Members { + if err := c.recoverMember(clus, i); err != nil { + return err + } + } + return nil +} + +func (c *caseAll) Desc() string { + if c.desc != "" { + return c.desc + } + return c.rpcpbCase.String() +} + +func (c *caseAll) TestCase() rpcpb.Case { + return c.rpcpbCase +} + +// caseUntilSnapshot injects a failure/test and waits for a snapshot event +type caseUntilSnapshot struct { + desc string + rpcpbCase rpcpb.Case + Case +} + +// all delay failure cases except the ones failing with latency +// greater than election timeout (trigger leader election and +// cluster keeps operating anyways) +var slowCases = map[rpcpb.Case]bool{ + rpcpb.Case_RANDOM_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER: true, + rpcpb.Case_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT: true, + rpcpb.Case_RANDOM_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT: true, + rpcpb.Case_RANDOM_DELAY_PEER_PORT_TX_RX_LEADER: true, + rpcpb.Case_DELAY_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT: true, + rpcpb.Case_RANDOM_DELAY_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT: true, + rpcpb.Case_RANDOM_DELAY_PEER_PORT_TX_RX_QUORUM: true, + rpcpb.Case_RANDOM_DELAY_PEER_PORT_TX_RX_ALL: true, +} + +func (c *caseUntilSnapshot) Inject(clus *Cluster) error { + if err := c.Case.Inject(clus); err != nil { + return err + } + + snapshotCount := clus.Members[0].Etcd.SnapshotCount + + now := time.Now() + clus.lg.Info( + "trigger snapshot START", + zap.String("desc", c.Desc()), + zap.Int64("etcd-snapshot-count", snapshotCount), + ) + + // maxRev may fail since failure just injected, retry if failed. + startRev, err := clus.maxRev() + for i := 0; i < 10 && startRev == 0; i++ { + startRev, err = clus.maxRev() + } + if startRev == 0 { + return err + } + lastRev := startRev + + // healthy cluster could accept 1000 req/sec at least. + // 3x time to trigger snapshot. + retries := int(snapshotCount) / 1000 * 3 + if v, ok := slowCases[c.TestCase()]; v && ok { + // slow network takes more retries + retries *= 5 + } + + for i := 0; i < retries; i++ { + lastRev, _ = clus.maxRev() + // If the number of proposals committed is bigger than snapshot count, + // a new snapshot should have been created. + dicc := lastRev - startRev + if dicc > snapshotCount { + clus.lg.Info( + "trigger snapshot PASS", + zap.Int("retries", i), + zap.String("desc", c.Desc()), + zap.Int64("committed-entries", dicc), + zap.Int64("etcd-snapshot-count", snapshotCount), + zap.Int64("last-revision", lastRev), + zap.Duration("took", time.Since(now)), + ) + return nil + } + + clus.lg.Info( + "trigger snapshot PROGRESS", + zap.Int("retries", i), + zap.Int64("committed-entries", dicc), + zap.Int64("etcd-snapshot-count", snapshotCount), + zap.Int64("last-revision", lastRev), + zap.Duration("took", time.Since(now)), + ) + time.Sleep(time.Second) + } + + return fmt.Errorf("cluster too slow: only %d commits in %d retries", lastRev-startRev, retries) +} + +func (c *caseUntilSnapshot) Desc() string { + if c.desc != "" { + return c.desc + } + if c.rpcpbCase.String() != "" { + return c.rpcpbCase.String() + } + return c.Case.Desc() +} + +func (c *caseUntilSnapshot) TestCase() rpcpb.Case { + return c.rpcpbCase +} diff --git a/functional/tester/failure_case_delay.go b/functional/tester/case_delay.go similarity index 73% rename from functional/tester/failure_case_delay.go rename to functional/tester/case_delay.go index a153b56499f..d06d1d65dc4 100644 --- a/functional/tester/failure_case_delay.go +++ b/functional/tester/case_delay.go @@ -20,22 +20,22 @@ import ( "go.uber.org/zap" ) -type failureDelay struct { - Failure +type caseDelay struct { + Case delayDuration time.Duration } -func (f *failureDelay) Inject(clus *Cluster) error { - if err := f.Failure.Inject(clus); err != nil { +func (c *caseDelay) Inject(clus *Cluster) error { + if err := c.Case.Inject(clus); err != nil { return err } - if f.delayDuration > 0 { + if c.delayDuration > 0 { clus.lg.Info( "wait after inject", - zap.Duration("delay", f.delayDuration), - zap.String("desc", f.Failure.Desc()), + zap.Duration("delay", c.delayDuration), + zap.String("desc", c.Case.Desc()), ) - time.Sleep(f.delayDuration) + time.Sleep(c.delayDuration) } return nil } diff --git a/functional/tester/failure_case_external.go b/functional/tester/case_external.go similarity index 51% rename from functional/tester/failure_case_external.go rename to functional/tester/case_external.go index 41d779f9f25..79d2a3717e4 100644 --- a/functional/tester/failure_case_external.go +++ b/functional/tester/case_external.go @@ -21,35 +21,35 @@ import ( "github.com/coreos/etcd/functional/rpcpb" ) -type failureExternal struct { - Failure +type caseExternal struct { + Case - desc string - failureCase rpcpb.FailureCase + desc string + rpcpbCase rpcpb.Case scriptPath string } -func (f *failureExternal) Inject(clus *Cluster) error { - return exec.Command(f.scriptPath, "enable", fmt.Sprintf("%d", clus.rd)).Run() +func (c *caseExternal) Inject(clus *Cluster) error { + return exec.Command(c.scriptPath, "enable", fmt.Sprintf("%d", clus.rd)).Run() } -func (f *failureExternal) Recover(clus *Cluster) error { - return exec.Command(f.scriptPath, "disable", fmt.Sprintf("%d", clus.rd)).Run() +func (c *caseExternal) Recover(clus *Cluster) error { + return exec.Command(c.scriptPath, "disable", fmt.Sprintf("%d", clus.rd)).Run() } -func (f *failureExternal) Desc() string { - return f.desc +func (c *caseExternal) Desc() string { + return c.desc } -func (f *failureExternal) FailureCase() rpcpb.FailureCase { - return f.failureCase +func (c *caseExternal) TestCase() rpcpb.Case { + return c.rpcpbCase } -func new_FailureCase_EXTERNAL(scriptPath string) Failure { - return &failureExternal{ - desc: fmt.Sprintf("external fault injector (script: %q)", scriptPath), - failureCase: rpcpb.FailureCase_EXTERNAL, - scriptPath: scriptPath, +func new_Case_EXTERNAL(scriptPath string) Case { + return &caseExternal{ + desc: fmt.Sprintf("external fault injector (script: %q)", scriptPath), + rpcpbCase: rpcpb.Case_EXTERNAL, + scriptPath: scriptPath, } } diff --git a/functional/tester/failure_case_failpoints.go b/functional/tester/case_failpoints.go similarity index 78% rename from functional/tester/failure_case_failpoints.go rename to functional/tester/case_failpoints.go index 0db8b89654e..4d26c8a8dd0 100644 --- a/functional/tester/failure_case_failpoints.go +++ b/functional/tester/case_failpoints.go @@ -32,7 +32,7 @@ type failpointStats struct { var fpStats failpointStats -func failpointFailures(clus *Cluster) (ret []Failure, err error) { +func failpointFailures(clus *Cluster) (ret []Case, err error) { var fps []string fps, err = failpointPaths(clus.Members[0].FailpointHTTPAddr) if err != nil { @@ -44,21 +44,21 @@ func failpointFailures(clus *Cluster) (ret []Failure, err error) { continue } - fpFails := failuresFromFailpoint(fp, clus.Tester.FailpointCommands) + fpFails := casesFromFailpoint(fp, clus.Tester.FailpointCommands) // wrap in delays so failpoint has time to trigger for i, fpf := range fpFails { if strings.Contains(fp, "Snap") { // hack to trigger snapshot failpoints - fpFails[i] = &failureUntilSnapshot{ - desc: fpf.Desc(), - failureCase: rpcpb.FailureCase_FAILPOINTS, - Failure: fpf, + fpFails[i] = &caseUntilSnapshot{ + desc: fpf.Desc(), + rpcpbCase: rpcpb.Case_FAILPOINTS, + Case: fpf, } } else { - fpFails[i] = &failureDelay{ - Failure: fpf, - delayDuration: clus.GetFailureDelayDuration(), + fpFails[i] = &caseDelay{ + Case: fpf, + delayDuration: clus.GetCaseDelayDuration(), } } } @@ -86,42 +86,45 @@ func failpointPaths(endpoint string) ([]string, error) { return fps, nil } -// failpoints follows FreeBSD KFAIL_POINT syntax. +// failpoints follows FreeBSD FAIL_POINT syntax. // e.g. panic("etcd-tester"),1*sleep(1000)->panic("etcd-tester") -func failuresFromFailpoint(fp string, failpointCommands []string) (fs []Failure) { +func casesFromFailpoint(fp string, failpointCommands []string) (fs []Case) { recov := makeRecoverFailpoint(fp) for _, fcmd := range failpointCommands { inject := makeInjectFailpoint(fp, fcmd) - fs = append(fs, []Failure{ - &failureFollower{ - failureByFunc: failureByFunc{ + fs = append(fs, []Case{ + &caseFollower{ + caseByFunc: caseByFunc{ desc: fmt.Sprintf("failpoint %q (one: %q)", fp, fcmd), - failureCase: rpcpb.FailureCase_FAILPOINTS, + rpcpbCase: rpcpb.Case_FAILPOINTS, injectMember: inject, recoverMember: recov, }, last: -1, lead: -1, }, - &failureLeader{ - failureByFunc: failureByFunc{ + &caseLeader{ + caseByFunc: caseByFunc{ desc: fmt.Sprintf("failpoint %q (leader: %q)", fp, fcmd), - failureCase: rpcpb.FailureCase_FAILPOINTS, + rpcpbCase: rpcpb.Case_FAILPOINTS, injectMember: inject, recoverMember: recov, }, last: -1, lead: -1, }, - &failureQuorum{ - desc: fmt.Sprintf("failpoint %q (quorum: %q)", fp, fcmd), - failureCase: rpcpb.FailureCase_FAILPOINTS, - injectMember: inject, - recoverMember: recov, + &caseQuorum{ + caseByFunc: caseByFunc{ + desc: fmt.Sprintf("failpoint %q (quorum: %q)", fp, fcmd), + rpcpbCase: rpcpb.Case_FAILPOINTS, + injectMember: inject, + recoverMember: recov, + }, + injected: make(map[int]struct{}), }, - &failureAll{ + &caseAll{ desc: fmt.Sprintf("failpoint %q (all: %q)", fp, fcmd), - failureCase: rpcpb.FailureCase_FAILPOINTS, + rpcpbCase: rpcpb.Case_FAILPOINTS, injectMember: inject, recoverMember: recov, }, diff --git a/functional/tester/case_network_blackhole.go b/functional/tester/case_network_blackhole.go new file mode 100644 index 00000000000..0d496eade4f --- /dev/null +++ b/functional/tester/case_network_blackhole.go @@ -0,0 +1,104 @@ +// Copyright 2018 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tester + +import "github.com/coreos/etcd/functional/rpcpb" + +func inject_BLACKHOLE_PEER_PORT_TX_RX(clus *Cluster, idx int) error { + return clus.sendOp(idx, rpcpb.Operation_BLACKHOLE_PEER_PORT_TX_RX) +} + +func recover_BLACKHOLE_PEER_PORT_TX_RX(clus *Cluster, idx int) error { + return clus.sendOp(idx, rpcpb.Operation_UNBLACKHOLE_PEER_PORT_TX_RX) +} + +func new_Case_BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER(clus *Cluster) Case { + cc := caseByFunc{ + rpcpbCase: rpcpb.Case_BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER, + injectMember: inject_BLACKHOLE_PEER_PORT_TX_RX, + recoverMember: recover_BLACKHOLE_PEER_PORT_TX_RX, + } + c := &caseFollower{cc, -1, -1} + return &caseDelay{ + Case: c, + delayDuration: clus.GetCaseDelayDuration(), + } +} + +func new_Case_BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT() Case { + cc := caseByFunc{ + rpcpbCase: rpcpb.Case_BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT, + injectMember: inject_BLACKHOLE_PEER_PORT_TX_RX, + recoverMember: recover_BLACKHOLE_PEER_PORT_TX_RX, + } + c := &caseFollower{cc, -1, -1} + return &caseUntilSnapshot{ + rpcpbCase: rpcpb.Case_BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT, + Case: c, + } +} + +func new_Case_BLACKHOLE_PEER_PORT_TX_RX_LEADER(clus *Cluster) Case { + cc := caseByFunc{ + rpcpbCase: rpcpb.Case_BLACKHOLE_PEER_PORT_TX_RX_LEADER, + injectMember: inject_BLACKHOLE_PEER_PORT_TX_RX, + recoverMember: recover_BLACKHOLE_PEER_PORT_TX_RX, + } + c := &caseLeader{cc, -1, -1} + return &caseDelay{ + Case: c, + delayDuration: clus.GetCaseDelayDuration(), + } +} + +func new_Case_BLACKHOLE_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT() Case { + cc := caseByFunc{ + rpcpbCase: rpcpb.Case_BLACKHOLE_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT, + injectMember: inject_BLACKHOLE_PEER_PORT_TX_RX, + recoverMember: recover_BLACKHOLE_PEER_PORT_TX_RX, + } + c := &caseLeader{cc, -1, -1} + return &caseUntilSnapshot{ + rpcpbCase: rpcpb.Case_BLACKHOLE_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT, + Case: c, + } +} + +func new_Case_BLACKHOLE_PEER_PORT_TX_RX_QUORUM(clus *Cluster) Case { + c := &caseQuorum{ + caseByFunc: caseByFunc{ + rpcpbCase: rpcpb.Case_BLACKHOLE_PEER_PORT_TX_RX_QUORUM, + injectMember: inject_BLACKHOLE_PEER_PORT_TX_RX, + recoverMember: recover_BLACKHOLE_PEER_PORT_TX_RX, + }, + injected: make(map[int]struct{}), + } + return &caseDelay{ + Case: c, + delayDuration: clus.GetCaseDelayDuration(), + } +} + +func new_Case_BLACKHOLE_PEER_PORT_TX_RX_ALL(clus *Cluster) Case { + c := &caseAll{ + rpcpbCase: rpcpb.Case_BLACKHOLE_PEER_PORT_TX_RX_ALL, + injectMember: inject_BLACKHOLE_PEER_PORT_TX_RX, + recoverMember: recover_BLACKHOLE_PEER_PORT_TX_RX, + } + return &caseDelay{ + Case: c, + delayDuration: clus.GetCaseDelayDuration(), + } +} diff --git a/functional/tester/failure_case_network_delay.go b/functional/tester/case_network_delay.go similarity index 53% rename from functional/tester/failure_case_network_delay.go rename to functional/tester/case_network_delay.go index c2e067c46a7..39a4717020d 100644 --- a/functional/tester/failure_case_network_delay.go +++ b/functional/tester/case_network_delay.go @@ -46,108 +46,111 @@ func recover_DELAY_PEER_PORT_TX_RX(clus *Cluster, idx int) error { return err } -func new_FailureCase_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER(clus *Cluster, random bool) Failure { - ff := failureByFunc{ - failureCase: rpcpb.FailureCase_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER, +func new_Case_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER(clus *Cluster, random bool) Case { + cc := caseByFunc{ + rpcpbCase: rpcpb.Case_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER, injectMember: inject_DELAY_PEER_PORT_TX_RX, recoverMember: recover_DELAY_PEER_PORT_TX_RX, } clus.Tester.UpdatedDelayLatencyMs = clus.Tester.DelayLatencyMs if random { clus.UpdateDelayLatencyMs() - ff.failureCase = rpcpb.FailureCase_RANDOM_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER + cc.rpcpbCase = rpcpb.Case_RANDOM_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER } - f := &failureFollower{ff, -1, -1} - return &failureDelay{ - Failure: f, - delayDuration: clus.GetFailureDelayDuration(), + c := &caseFollower{cc, -1, -1} + return &caseDelay{ + Case: c, + delayDuration: clus.GetCaseDelayDuration(), } } -func new_FailureCase_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT(clus *Cluster, random bool) Failure { - ff := failureByFunc{ - failureCase: rpcpb.FailureCase_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT, +func new_Case_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT(clus *Cluster, random bool) Case { + cc := caseByFunc{ + rpcpbCase: rpcpb.Case_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT, injectMember: inject_DELAY_PEER_PORT_TX_RX, recoverMember: recover_DELAY_PEER_PORT_TX_RX, } clus.Tester.UpdatedDelayLatencyMs = clus.Tester.DelayLatencyMs if random { clus.UpdateDelayLatencyMs() - ff.failureCase = rpcpb.FailureCase_RANDOM_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT + cc.rpcpbCase = rpcpb.Case_RANDOM_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT } - f := &failureFollower{ff, -1, -1} - return &failureUntilSnapshot{ - failureCase: ff.failureCase, - Failure: f, + c := &caseFollower{cc, -1, -1} + return &caseUntilSnapshot{ + rpcpbCase: cc.rpcpbCase, + Case: c, } } -func new_FailureCase_DELAY_PEER_PORT_TX_RX_LEADER(clus *Cluster, random bool) Failure { - ff := failureByFunc{ - failureCase: rpcpb.FailureCase_DELAY_PEER_PORT_TX_RX_LEADER, +func new_Case_DELAY_PEER_PORT_TX_RX_LEADER(clus *Cluster, random bool) Case { + cc := caseByFunc{ + rpcpbCase: rpcpb.Case_DELAY_PEER_PORT_TX_RX_LEADER, injectMember: inject_DELAY_PEER_PORT_TX_RX, recoverMember: recover_DELAY_PEER_PORT_TX_RX, } clus.Tester.UpdatedDelayLatencyMs = clus.Tester.DelayLatencyMs if random { clus.UpdateDelayLatencyMs() - ff.failureCase = rpcpb.FailureCase_RANDOM_DELAY_PEER_PORT_TX_RX_LEADER + cc.rpcpbCase = rpcpb.Case_RANDOM_DELAY_PEER_PORT_TX_RX_LEADER } - f := &failureLeader{ff, -1, -1} - return &failureDelay{ - Failure: f, - delayDuration: clus.GetFailureDelayDuration(), + c := &caseLeader{cc, -1, -1} + return &caseDelay{ + Case: c, + delayDuration: clus.GetCaseDelayDuration(), } } -func new_FailureCase_DELAY_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT(clus *Cluster, random bool) Failure { - ff := failureByFunc{ - failureCase: rpcpb.FailureCase_DELAY_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT, +func new_Case_DELAY_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT(clus *Cluster, random bool) Case { + cc := caseByFunc{ + rpcpbCase: rpcpb.Case_DELAY_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT, injectMember: inject_DELAY_PEER_PORT_TX_RX, recoverMember: recover_DELAY_PEER_PORT_TX_RX, } clus.Tester.UpdatedDelayLatencyMs = clus.Tester.DelayLatencyMs if random { clus.UpdateDelayLatencyMs() - ff.failureCase = rpcpb.FailureCase_RANDOM_DELAY_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT + cc.rpcpbCase = rpcpb.Case_RANDOM_DELAY_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT } - f := &failureLeader{ff, -1, -1} - return &failureUntilSnapshot{ - failureCase: ff.failureCase, - Failure: f, + c := &caseLeader{cc, -1, -1} + return &caseUntilSnapshot{ + rpcpbCase: cc.rpcpbCase, + Case: c, } } -func new_FailureCase_DELAY_PEER_PORT_TX_RX_QUORUM(clus *Cluster, random bool) Failure { - f := &failureQuorum{ - failureCase: rpcpb.FailureCase_DELAY_PEER_PORT_TX_RX_QUORUM, - injectMember: inject_DELAY_PEER_PORT_TX_RX, - recoverMember: recover_DELAY_PEER_PORT_TX_RX, +func new_Case_DELAY_PEER_PORT_TX_RX_QUORUM(clus *Cluster, random bool) Case { + c := &caseQuorum{ + caseByFunc: caseByFunc{ + rpcpbCase: rpcpb.Case_DELAY_PEER_PORT_TX_RX_QUORUM, + injectMember: inject_DELAY_PEER_PORT_TX_RX, + recoverMember: recover_DELAY_PEER_PORT_TX_RX, + }, + injected: make(map[int]struct{}), } clus.Tester.UpdatedDelayLatencyMs = clus.Tester.DelayLatencyMs if random { clus.UpdateDelayLatencyMs() - f.failureCase = rpcpb.FailureCase_RANDOM_DELAY_PEER_PORT_TX_RX_QUORUM + c.rpcpbCase = rpcpb.Case_RANDOM_DELAY_PEER_PORT_TX_RX_QUORUM } - return &failureDelay{ - Failure: f, - delayDuration: clus.GetFailureDelayDuration(), + return &caseDelay{ + Case: c, + delayDuration: clus.GetCaseDelayDuration(), } } -func new_FailureCase_DELAY_PEER_PORT_TX_RX_ALL(clus *Cluster, random bool) Failure { - f := &failureAll{ - failureCase: rpcpb.FailureCase_DELAY_PEER_PORT_TX_RX_ALL, +func new_Case_DELAY_PEER_PORT_TX_RX_ALL(clus *Cluster, random bool) Case { + c := &caseAll{ + rpcpbCase: rpcpb.Case_DELAY_PEER_PORT_TX_RX_ALL, injectMember: inject_DELAY_PEER_PORT_TX_RX, recoverMember: recover_DELAY_PEER_PORT_TX_RX, } clus.Tester.UpdatedDelayLatencyMs = clus.Tester.DelayLatencyMs if random { clus.UpdateDelayLatencyMs() - f.failureCase = rpcpb.FailureCase_RANDOM_DELAY_PEER_PORT_TX_RX_ALL + c.rpcpbCase = rpcpb.Case_RANDOM_DELAY_PEER_PORT_TX_RX_ALL } - return &failureDelay{ - Failure: f, - delayDuration: clus.GetFailureDelayDuration(), + return &caseDelay{ + Case: c, + delayDuration: clus.GetCaseDelayDuration(), } } diff --git a/functional/tester/case_no_fail.go b/functional/tester/case_no_fail.go new file mode 100644 index 00000000000..e85bef93c44 --- /dev/null +++ b/functional/tester/case_no_fail.go @@ -0,0 +1,99 @@ +// Copyright 2018 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tester + +import ( + "time" + + "github.com/coreos/etcd/functional/rpcpb" + + "go.uber.org/zap" +) + +type caseNoFailWithStress caseByFunc + +func (c *caseNoFailWithStress) Inject(clus *Cluster) error { + return nil +} + +func (c *caseNoFailWithStress) Recover(clus *Cluster) error { + return nil +} + +func (c *caseNoFailWithStress) Desc() string { + if c.desc != "" { + return c.desc + } + return c.rpcpbCase.String() +} + +func (c *caseNoFailWithStress) TestCase() rpcpb.Case { + return c.rpcpbCase +} + +func new_Case_NO_FAIL_WITH_STRESS(clus *Cluster) Case { + c := &caseNoFailWithStress{ + rpcpbCase: rpcpb.Case_NO_FAIL_WITH_STRESS, + } + return &caseDelay{ + Case: c, + delayDuration: clus.GetCaseDelayDuration(), + } +} + +type caseNoFailWithNoStressForLiveness caseByFunc + +func (c *caseNoFailWithNoStressForLiveness) Inject(clus *Cluster) error { + clus.lg.Info( + "extra delay for liveness mode with no stresser", + zap.Int("round", clus.rd), + zap.Int("case", clus.cs), + zap.String("desc", c.Desc()), + ) + time.Sleep(clus.GetCaseDelayDuration()) + + clus.lg.Info( + "wait health in liveness mode", + zap.Int("round", clus.rd), + zap.Int("case", clus.cs), + zap.String("desc", c.Desc()), + ) + return clus.WaitHealth() +} + +func (c *caseNoFailWithNoStressForLiveness) Recover(clus *Cluster) error { + return nil +} + +func (c *caseNoFailWithNoStressForLiveness) Desc() string { + if c.desc != "" { + return c.desc + } + return c.rpcpbCase.String() +} + +func (c *caseNoFailWithNoStressForLiveness) TestCase() rpcpb.Case { + return c.rpcpbCase +} + +func new_Case_NO_FAIL_WITH_NO_STRESS_FOR_LIVENESS(clus *Cluster) Case { + c := &caseNoFailWithNoStressForLiveness{ + rpcpbCase: rpcpb.Case_NO_FAIL_WITH_NO_STRESS_FOR_LIVENESS, + } + return &caseDelay{ + Case: c, + delayDuration: clus.GetCaseDelayDuration(), + } +} diff --git a/functional/tester/failure_case_sigquit_remove.go b/functional/tester/case_sigquit_remove.go similarity index 82% rename from functional/tester/failure_case_sigquit_remove.go rename to functional/tester/case_sigquit_remove.go index 270b2d81f7c..13fe68f4e14 100644 --- a/functional/tester/failure_case_sigquit_remove.go +++ b/functional/tester/case_sigquit_remove.go @@ -174,43 +174,43 @@ func recover_SIGQUIT_ETCD_AND_REMOVE_DATA(clus *Cluster, idx1 int) error { return err } -func new_FailureCase_SIGQUIT_AND_REMOVE_ONE_FOLLOWER(clus *Cluster) Failure { - ff := failureByFunc{ - failureCase: rpcpb.FailureCase_SIGQUIT_AND_REMOVE_ONE_FOLLOWER, +func new_Case_SIGQUIT_AND_REMOVE_ONE_FOLLOWER(clus *Cluster) Case { + cc := caseByFunc{ + rpcpbCase: rpcpb.Case_SIGQUIT_AND_REMOVE_ONE_FOLLOWER, injectMember: inject_SIGQUIT_ETCD_AND_REMOVE_DATA, recoverMember: recover_SIGQUIT_ETCD_AND_REMOVE_DATA, } - f := &failureFollower{ff, -1, -1} - return &failureDelay{ - Failure: f, - delayDuration: clus.GetFailureDelayDuration(), + c := &caseFollower{cc, -1, -1} + return &caseDelay{ + Case: c, + delayDuration: clus.GetCaseDelayDuration(), } } -func new_FailureCase_SIGQUIT_AND_REMOVE_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT(clus *Cluster) Failure { - return &failureUntilSnapshot{ - failureCase: rpcpb.FailureCase_SIGQUIT_AND_REMOVE_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT, - Failure: new_FailureCase_SIGQUIT_AND_REMOVE_ONE_FOLLOWER(clus), +func new_Case_SIGQUIT_AND_REMOVE_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT(clus *Cluster) Case { + return &caseUntilSnapshot{ + rpcpbCase: rpcpb.Case_SIGQUIT_AND_REMOVE_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT, + Case: new_Case_SIGQUIT_AND_REMOVE_ONE_FOLLOWER(clus), } } -func new_FailureCase_SIGQUIT_AND_REMOVE_LEADER(clus *Cluster) Failure { - ff := failureByFunc{ - failureCase: rpcpb.FailureCase_SIGQUIT_AND_REMOVE_LEADER, +func new_Case_SIGQUIT_AND_REMOVE_LEADER(clus *Cluster) Case { + cc := caseByFunc{ + rpcpbCase: rpcpb.Case_SIGQUIT_AND_REMOVE_LEADER, injectMember: inject_SIGQUIT_ETCD_AND_REMOVE_DATA, recoverMember: recover_SIGQUIT_ETCD_AND_REMOVE_DATA, } - f := &failureLeader{ff, -1, -1} - return &failureDelay{ - Failure: f, - delayDuration: clus.GetFailureDelayDuration(), + c := &caseLeader{cc, -1, -1} + return &caseDelay{ + Case: c, + delayDuration: clus.GetCaseDelayDuration(), } } -func new_FailureCase_SIGQUIT_AND_REMOVE_LEADER_UNTIL_TRIGGER_SNAPSHOT(clus *Cluster) Failure { - return &failureUntilSnapshot{ - failureCase: rpcpb.FailureCase_SIGQUIT_AND_REMOVE_LEADER_UNTIL_TRIGGER_SNAPSHOT, - Failure: new_FailureCase_SIGQUIT_AND_REMOVE_LEADER(clus), +func new_Case_SIGQUIT_AND_REMOVE_LEADER_UNTIL_TRIGGER_SNAPSHOT(clus *Cluster) Case { + return &caseUntilSnapshot{ + rpcpbCase: rpcpb.Case_SIGQUIT_AND_REMOVE_LEADER_UNTIL_TRIGGER_SNAPSHOT, + Case: new_Case_SIGQUIT_AND_REMOVE_LEADER(clus), } } diff --git a/functional/tester/case_sigquit_remove_quorum.go b/functional/tester/case_sigquit_remove_quorum.go new file mode 100644 index 00000000000..9653de10dbf --- /dev/null +++ b/functional/tester/case_sigquit_remove_quorum.go @@ -0,0 +1,275 @@ +// Copyright 2018 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tester + +import ( + "context" + "fmt" + "strings" + "time" + + "github.com/coreos/etcd/clientv3" + "github.com/coreos/etcd/functional/rpcpb" + + "go.uber.org/zap" +) + +type fetchSnapshotCaseQuorum struct { + desc string + rpcpbCase rpcpb.Case + injected map[int]struct{} + snapshotted int +} + +func (c *fetchSnapshotCaseQuorum) Inject(clus *Cluster) error { + // 1. Assume node C is the current leader with most up-to-date data. + lead, err := clus.GetLeader() + if err != nil { + return err + } + c.snapshotted = lead + + // 2. Download snapshot from node C, before destroying node A and B. + clus.lg.Info( + "save snapshot on leader node START", + zap.String("target-endpoint", clus.Members[lead].EtcdClientEndpoint), + ) + var resp *rpcpb.Response + resp, err = clus.sendOpWithResp(lead, rpcpb.Operation_SAVE_SNAPSHOT) + if resp == nil || (resp != nil && !resp.Success) || err != nil { + clus.lg.Info( + "save snapshot on leader node FAIL", + zap.String("target-endpoint", clus.Members[lead].EtcdClientEndpoint), + zap.Error(err), + ) + return err + } + clus.lg.Info( + "save snapshot on leader node SUCCESS", + zap.String("target-endpoint", clus.Members[lead].EtcdClientEndpoint), + zap.String("member-name", resp.SnapshotInfo.MemberName), + zap.Strings("member-client-urls", resp.SnapshotInfo.MemberClientURLs), + zap.String("snapshot-path", resp.SnapshotInfo.SnapshotPath), + zap.String("snapshot-file-size", resp.SnapshotInfo.SnapshotFileSize), + zap.String("snapshot-total-size", resp.SnapshotInfo.SnapshotTotalSize), + zap.Int64("snapshot-total-key", resp.SnapshotInfo.SnapshotTotalKey), + zap.Int64("snapshot-hash", resp.SnapshotInfo.SnapshotHash), + zap.Int64("snapshot-revision", resp.SnapshotInfo.SnapshotRevision), + zap.String("took", resp.SnapshotInfo.Took), + zap.Error(err), + ) + if err != nil { + return err + } + clus.Members[lead].SnapshotInfo = resp.SnapshotInfo + + leaderc, err := clus.Members[lead].CreateEtcdClient() + if err != nil { + return err + } + defer leaderc.Close() + var mresp *clientv3.MemberListResponse + mresp, err = leaderc.MemberList(context.Background()) + mss := []string{} + if err == nil && mresp != nil { + mss = describeMembers(mresp) + } + clus.lg.Info( + "member list before disastrous machine failure", + zap.String("request-to", clus.Members[lead].EtcdClientEndpoint), + zap.Strings("members", mss), + zap.Error(err), + ) + if err != nil { + return err + } + + // simulate real life; machine failures may happen + // after some time since last snapshot save + time.Sleep(time.Second) + + // 3. Destroy node A and B, and make the whole cluster inoperable. + for { + c.injected = pickQuorum(len(clus.Members)) + if _, ok := c.injected[lead]; !ok { + break + } + } + for idx := range c.injected { + clus.lg.Info( + "disastrous machine failure to quorum START", + zap.String("target-endpoint", clus.Members[idx].EtcdClientEndpoint), + ) + err = clus.sendOp(idx, rpcpb.Operation_SIGQUIT_ETCD_AND_REMOVE_DATA) + clus.lg.Info( + "disastrous machine failure to quorum END", + zap.String("target-endpoint", clus.Members[idx].EtcdClientEndpoint), + zap.Error(err), + ) + if err != nil { + return err + } + } + + // 4. Now node C cannot operate either. + // 5. SIGTERM node C and remove its data directories. + clus.lg.Info( + "disastrous machine failure to old leader START", + zap.String("target-endpoint", clus.Members[lead].EtcdClientEndpoint), + ) + err = clus.sendOp(lead, rpcpb.Operation_SIGQUIT_ETCD_AND_REMOVE_DATA) + clus.lg.Info( + "disastrous machine failure to old leader END", + zap.String("target-endpoint", clus.Members[lead].EtcdClientEndpoint), + zap.Error(err), + ) + return err +} + +func (c *fetchSnapshotCaseQuorum) Recover(clus *Cluster) error { + // 6. Restore a new seed member from node C's latest snapshot file. + oldlead := c.snapshotted + + // configuration on restart from recovered snapshot + // seed member's configuration is all the same as previous one + // except initial cluster string is now a single-node cluster + clus.Members[oldlead].EtcdOnSnapshotRestore = clus.Members[oldlead].Etcd + clus.Members[oldlead].EtcdOnSnapshotRestore.InitialClusterState = "existing" + name := clus.Members[oldlead].Etcd.Name + initClus := []string{} + for _, u := range clus.Members[oldlead].Etcd.AdvertisePeerURLs { + initClus = append(initClus, fmt.Sprintf("%s=%s", name, u)) + } + clus.Members[oldlead].EtcdOnSnapshotRestore.InitialCluster = strings.Join(initClus, ",") + + clus.lg.Info( + "restore snapshot and restart from snapshot request START", + zap.String("target-endpoint", clus.Members[oldlead].EtcdClientEndpoint), + zap.Strings("initial-cluster", initClus), + ) + err := clus.sendOp(oldlead, rpcpb.Operation_RESTORE_RESTART_FROM_SNAPSHOT) + clus.lg.Info( + "restore snapshot and restart from snapshot request END", + zap.String("target-endpoint", clus.Members[oldlead].EtcdClientEndpoint), + zap.Strings("initial-cluster", initClus), + zap.Error(err), + ) + if err != nil { + return err + } + + leaderc, err := clus.Members[oldlead].CreateEtcdClient() + if err != nil { + return err + } + defer leaderc.Close() + + // 7. Add another member to establish 2-node cluster. + // 8. Add another member to establish 3-node cluster. + // 9. Add more if any. + idxs := make([]int, 0, len(c.injected)) + for idx := range c.injected { + idxs = append(idxs, idx) + } + clus.lg.Info("member add START", zap.Int("members-to-add", len(idxs))) + for i, idx := range idxs { + clus.lg.Info( + "member add request SENT", + zap.String("target-endpoint", clus.Members[idx].EtcdClientEndpoint), + zap.Strings("peer-urls", clus.Members[idx].Etcd.AdvertisePeerURLs), + ) + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + _, err := leaderc.MemberAdd(ctx, clus.Members[idx].Etcd.AdvertisePeerURLs) + cancel() + clus.lg.Info( + "member add request DONE", + zap.String("target-endpoint", clus.Members[idx].EtcdClientEndpoint), + zap.Strings("peer-urls", clus.Members[idx].Etcd.AdvertisePeerURLs), + zap.Error(err), + ) + if err != nil { + return err + } + + // start the added(new) member with fresh data + clus.Members[idx].EtcdOnSnapshotRestore = clus.Members[idx].Etcd + clus.Members[idx].EtcdOnSnapshotRestore.InitialClusterState = "existing" + name := clus.Members[idx].Etcd.Name + for _, u := range clus.Members[idx].Etcd.AdvertisePeerURLs { + initClus = append(initClus, fmt.Sprintf("%s=%s", name, u)) + } + clus.Members[idx].EtcdOnSnapshotRestore.InitialCluster = strings.Join(initClus, ",") + clus.lg.Info( + "restart from snapshot request SENT", + zap.String("target-endpoint", clus.Members[idx].EtcdClientEndpoint), + zap.Strings("initial-cluster", initClus), + ) + err = clus.sendOp(idx, rpcpb.Operation_RESTART_FROM_SNAPSHOT) + clus.lg.Info( + "restart from snapshot request DONE", + zap.String("target-endpoint", clus.Members[idx].EtcdClientEndpoint), + zap.Strings("initial-cluster", initClus), + zap.Error(err), + ) + if err != nil { + return err + } + + if i != len(c.injected)-1 { + // wait until membership reconfiguration entry gets applied + // TODO: test concurrent member add + dur := 5 * clus.Members[idx].ElectionTimeout() + clus.lg.Info( + "waiting after restart from snapshot request", + zap.Int("i", i), + zap.Int("idx", idx), + zap.Duration("sleep", dur), + ) + time.Sleep(dur) + } else { + clus.lg.Info( + "restart from snapshot request ALL END", + zap.Int("i", i), + zap.Int("idx", idx), + ) + } + } + return nil +} + +func (c *fetchSnapshotCaseQuorum) Desc() string { + if c.desc != "" { + return c.desc + } + return c.rpcpbCase.String() +} + +func (c *fetchSnapshotCaseQuorum) TestCase() rpcpb.Case { + return c.rpcpbCase +} + +func new_Case_SIGQUIT_AND_REMOVE_QUORUM_AND_RESTORE_LEADER_SNAPSHOT_FROM_SCRATCH(clus *Cluster) Case { + c := &fetchSnapshotCaseQuorum{ + rpcpbCase: rpcpb.Case_SIGQUIT_AND_REMOVE_QUORUM_AND_RESTORE_LEADER_SNAPSHOT_FROM_SCRATCH, + injected: make(map[int]struct{}), + snapshotted: -1, + } + // simulate real life; machine replacements may happen + // after some time since disaster + return &caseDelay{ + Case: c, + delayDuration: clus.GetCaseDelayDuration(), + } +} diff --git a/functional/tester/case_sigterm.go b/functional/tester/case_sigterm.go new file mode 100644 index 00000000000..f5d472afcdc --- /dev/null +++ b/functional/tester/case_sigterm.go @@ -0,0 +1,92 @@ +// Copyright 2018 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tester + +import "github.com/coreos/etcd/functional/rpcpb" + +func inject_SIGTERM_ETCD(clus *Cluster, idx int) error { + return clus.sendOp(idx, rpcpb.Operation_SIGTERM_ETCD) +} + +func recover_SIGTERM_ETCD(clus *Cluster, idx int) error { + return clus.sendOp(idx, rpcpb.Operation_RESTART_ETCD) +} + +func new_Case_SIGTERM_ONE_FOLLOWER(clus *Cluster) Case { + cc := caseByFunc{ + rpcpbCase: rpcpb.Case_SIGTERM_ONE_FOLLOWER, + injectMember: inject_SIGTERM_ETCD, + recoverMember: recover_SIGTERM_ETCD, + } + c := &caseFollower{cc, -1, -1} + return &caseDelay{ + Case: c, + delayDuration: clus.GetCaseDelayDuration(), + } +} + +func new_Case_SIGTERM_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT(clus *Cluster) Case { + return &caseUntilSnapshot{ + rpcpbCase: rpcpb.Case_SIGTERM_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT, + Case: new_Case_SIGTERM_ONE_FOLLOWER(clus), + } +} + +func new_Case_SIGTERM_LEADER(clus *Cluster) Case { + cc := caseByFunc{ + rpcpbCase: rpcpb.Case_SIGTERM_LEADER, + injectMember: inject_SIGTERM_ETCD, + recoverMember: recover_SIGTERM_ETCD, + } + c := &caseLeader{cc, -1, -1} + return &caseDelay{ + Case: c, + delayDuration: clus.GetCaseDelayDuration(), + } +} + +func new_Case_SIGTERM_LEADER_UNTIL_TRIGGER_SNAPSHOT(clus *Cluster) Case { + return &caseUntilSnapshot{ + rpcpbCase: rpcpb.Case_SIGTERM_LEADER_UNTIL_TRIGGER_SNAPSHOT, + Case: new_Case_SIGTERM_LEADER(clus), + } +} + +func new_Case_SIGTERM_QUORUM(clus *Cluster) Case { + c := &caseQuorum{ + caseByFunc: caseByFunc{ + rpcpbCase: rpcpb.Case_SIGTERM_QUORUM, + injectMember: inject_SIGTERM_ETCD, + recoverMember: recover_SIGTERM_ETCD, + }, + injected: make(map[int]struct{}), + } + return &caseDelay{ + Case: c, + delayDuration: clus.GetCaseDelayDuration(), + } +} + +func new_Case_SIGTERM_ALL(clus *Cluster) Case { + c := &caseAll{ + rpcpbCase: rpcpb.Case_SIGTERM_ALL, + injectMember: inject_SIGTERM_ETCD, + recoverMember: recover_SIGTERM_ETCD, + } + return &caseDelay{ + Case: c, + delayDuration: clus.GetCaseDelayDuration(), + } +} diff --git a/functional/tester/checker.go b/functional/tester/checker.go new file mode 100644 index 00000000000..48e98cb0db8 --- /dev/null +++ b/functional/tester/checker.go @@ -0,0 +1,28 @@ +// Copyright 2018 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tester + +import "github.com/coreos/etcd/functional/rpcpb" + +// Checker checks cluster consistency. +type Checker interface { + // Type returns the checker type. + Type() rpcpb.Checker + // EtcdClientEndpoints returns the client endpoints of + // all checker target nodes.. + EtcdClientEndpoints() []string + // Check returns an error if the system fails a consistency check. + Check() error +} diff --git a/functional/tester/checker_kv_hash.go b/functional/tester/checker_kv_hash.go new file mode 100644 index 00000000000..586ad89bd13 --- /dev/null +++ b/functional/tester/checker_kv_hash.go @@ -0,0 +1,89 @@ +// Copyright 2018 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tester + +import ( + "fmt" + "time" + + "github.com/coreos/etcd/functional/rpcpb" + + "go.uber.org/zap" +) + +const retries = 7 + +type kvHashChecker struct { + ctype rpcpb.Checker + clus *Cluster +} + +func newKVHashChecker(clus *Cluster) Checker { + return &kvHashChecker{ + ctype: rpcpb.Checker_KV_HASH, + clus: clus, + } +} + +func (hc *kvHashChecker) checkRevAndHashes() (err error) { + var ( + revs map[string]int64 + hashes map[string]int64 + ) + // retries in case of transient failure or etcd cluster has not stablized yet. + for i := 0; i < retries; i++ { + revs, hashes, err = hc.clus.getRevisionHash() + if err != nil { + hc.clus.lg.Warn( + "failed to get revision and hash", + zap.Int("retries", i), + zap.Error(err), + ) + } else { + sameRev := getSameValue(revs) + sameHashes := getSameValue(hashes) + if sameRev && sameHashes { + return nil + } + hc.clus.lg.Warn( + "retrying; etcd cluster is not stable", + zap.Int("retries", i), + zap.Bool("same-revisions", sameRev), + zap.Bool("same-hashes", sameHashes), + zap.String("revisions", fmt.Sprintf("%+v", revs)), + zap.String("hashes", fmt.Sprintf("%+v", hashes)), + ) + } + time.Sleep(time.Second) + } + + if err != nil { + return fmt.Errorf("failed revision and hash check (%v)", err) + } + + return fmt.Errorf("etcd cluster is not stable: [revisions: %v] and [hashes: %v]", revs, hashes) +} + +func (hc *kvHashChecker) Type() rpcpb.Checker { + return hc.ctype +} + +func (hc *kvHashChecker) EtcdClientEndpoints() []string { + return hc.clus.EtcdClientEndpoints() +} + +func (hc *kvHashChecker) Check() error { + return hc.checkRevAndHashes() +} diff --git a/functional/tester/checks.go b/functional/tester/checker_lease_expire.go similarity index 61% rename from functional/tester/checks.go rename to functional/tester/checker_lease_expire.go index d628e218f9f..a897421288c 100644 --- a/functional/tester/checks.go +++ b/functional/tester/checker_lease_expire.go @@ -27,83 +27,32 @@ import ( "google.golang.org/grpc" ) -const retries = 7 - -// Checker checks cluster consistency. -type Checker interface { - // Check returns an error if the system fails a consistency check. - Check() error -} - -type hashAndRevGetter interface { - getRevisionHash() (revs map[string]int64, hashes map[string]int64, err error) -} - -type hashChecker struct { - lg *zap.Logger - hrg hashAndRevGetter -} - -func newHashChecker(lg *zap.Logger, hrg hashAndRevGetter) Checker { - return &hashChecker{ - lg: lg, - hrg: hrg, - } +type leaseExpireChecker struct { + ctype rpcpb.Checker + lg *zap.Logger + m *rpcpb.Member + ls *leaseStresser + cli *clientv3.Client } -const leaseCheckerTimeout = 10 * time.Second - -func (hc *hashChecker) checkRevAndHashes() (err error) { - var ( - revs map[string]int64 - hashes map[string]int64 - ) - // retries in case of transient failure or etcd cluster has not stablized yet. - for i := 0; i < retries; i++ { - revs, hashes, err = hc.hrg.getRevisionHash() - if err != nil { - hc.lg.Warn( - "failed to get revision and hash", - zap.Int("retries", i), - zap.Error(err), - ) - } else { - sameRev := getSameValue(revs) - sameHashes := getSameValue(hashes) - if sameRev && sameHashes { - return nil - } - hc.lg.Warn( - "retrying; etcd cluster is not stable", - zap.Int("retries", i), - zap.Bool("same-revisions", sameRev), - zap.Bool("same-hashes", sameHashes), - zap.String("revisions", fmt.Sprintf("%+v", revs)), - zap.String("hashes", fmt.Sprintf("%+v", hashes)), - ) - } - time.Sleep(time.Second) - } - - if err != nil { - return fmt.Errorf("failed revision and hash check (%v)", err) +func newLeaseExpireChecker(ls *leaseStresser) Checker { + return &leaseExpireChecker{ + ctype: rpcpb.Checker_LEASE_EXPIRE, + lg: ls.lg, + m: ls.m, + ls: ls, } - - return fmt.Errorf("etcd cluster is not stable: [revisions: %v] and [hashes: %v]", revs, hashes) } -func (hc *hashChecker) Check() error { - return hc.checkRevAndHashes() +func (lc *leaseExpireChecker) Type() rpcpb.Checker { + return lc.ctype } -type leaseChecker struct { - lg *zap.Logger - m *rpcpb.Member - ls *leaseStresser - cli *clientv3.Client +func (lc *leaseExpireChecker) EtcdClientEndpoints() []string { + return []string{lc.m.EtcdClientEndpoint} } -func (lc *leaseChecker) Check() error { +func (lc *leaseExpireChecker) Check() error { if lc.ls == nil { return nil } @@ -134,9 +83,11 @@ func (lc *leaseChecker) Check() error { return lc.checkShortLivedLeases() } +const leaseExpireCheckerTimeout = 10 * time.Second + // checkShortLivedLeases ensures leases expire. -func (lc *leaseChecker) checkShortLivedLeases() error { - ctx, cancel := context.WithTimeout(context.Background(), leaseCheckerTimeout) +func (lc *leaseExpireChecker) checkShortLivedLeases() error { + ctx, cancel := context.WithTimeout(context.Background(), leaseExpireCheckerTimeout) errc := make(chan error) defer cancel() for leaseID := range lc.ls.shortLivedLeases.leases { @@ -154,7 +105,7 @@ func (lc *leaseChecker) checkShortLivedLeases() error { return errsToError(errs) } -func (lc *leaseChecker) checkShortLivedLease(ctx context.Context, leaseID int64) (err error) { +func (lc *leaseExpireChecker) checkShortLivedLease(ctx context.Context, leaseID int64) (err error) { // retry in case of transient failure or lease is expired but not yet revoked due to the fact that etcd cluster didn't have enought time to delete it. var resp *clientv3.LeaseTimeToLiveResponse for i := 0; i < retries; i++ { @@ -199,7 +150,7 @@ func (lc *leaseChecker) checkShortLivedLease(ctx context.Context, leaseID int64) return err } -func (lc *leaseChecker) checkLease(ctx context.Context, expired bool, leaseID int64) error { +func (lc *leaseExpireChecker) checkLease(ctx context.Context, expired bool, leaseID int64) error { keysExpired, err := lc.hasKeysAttachedToLeaseExpired(ctx, leaseID) if err != nil { lc.lg.Warn( @@ -227,8 +178,8 @@ func (lc *leaseChecker) checkLease(ctx context.Context, expired bool, leaseID in return nil } -func (lc *leaseChecker) check(expired bool, leases map[int64]time.Time) error { - ctx, cancel := context.WithTimeout(context.Background(), leaseCheckerTimeout) +func (lc *leaseExpireChecker) check(expired bool, leases map[int64]time.Time) error { + ctx, cancel := context.WithTimeout(context.Background(), leaseExpireCheckerTimeout) defer cancel() for leaseID := range leases { if err := lc.checkLease(ctx, expired, leaseID); err != nil { @@ -239,7 +190,7 @@ func (lc *leaseChecker) check(expired bool, leases map[int64]time.Time) error { } // TODO: handle failures from "grpc.FailFast(false)" -func (lc *leaseChecker) getLeaseByID(ctx context.Context, leaseID int64) (*clientv3.LeaseTimeToLiveResponse, error) { +func (lc *leaseExpireChecker) getLeaseByID(ctx context.Context, leaseID int64) (*clientv3.LeaseTimeToLiveResponse, error) { return lc.cli.TimeToLive( ctx, clientv3.LeaseID(leaseID), @@ -247,7 +198,7 @@ func (lc *leaseChecker) getLeaseByID(ctx context.Context, leaseID int64) (*clien ) } -func (lc *leaseChecker) hasLeaseExpired(ctx context.Context, leaseID int64) (bool, error) { +func (lc *leaseExpireChecker) hasLeaseExpired(ctx context.Context, leaseID int64) (bool, error) { // keep retrying until lease's state is known or ctx is being canceled for ctx.Err() == nil { resp, err := lc.getLeaseByID(ctx, leaseID) @@ -272,7 +223,7 @@ func (lc *leaseChecker) hasLeaseExpired(ctx context.Context, leaseID int64) (boo // The keys attached to the lease has the format of "_" where idx is the ordering key creation // Since the format of keys contains about leaseID, finding keys base on "" prefix // determines whether the attached keys for a given leaseID has been deleted or not -func (lc *leaseChecker) hasKeysAttachedToLeaseExpired(ctx context.Context, leaseID int64) (bool, error) { +func (lc *leaseExpireChecker) hasKeysAttachedToLeaseExpired(ctx context.Context, leaseID int64) (bool, error) { resp, err := lc.cli.Get(ctx, fmt.Sprintf("%d", leaseID), clientv3.WithPrefix()) if err != nil { lc.lg.Warn( @@ -285,42 +236,3 @@ func (lc *leaseChecker) hasKeysAttachedToLeaseExpired(ctx context.Context, lease } return len(resp.Kvs) == 0, nil } - -// compositeChecker implements a checker that runs a slice of Checkers concurrently. -type compositeChecker struct{ checkers []Checker } - -func newCompositeChecker(checkers []Checker) Checker { - return &compositeChecker{checkers} -} - -func (cchecker *compositeChecker) Check() error { - errc := make(chan error) - for _, c := range cchecker.checkers { - go func(chk Checker) { errc <- chk.Check() }(c) - } - var errs []error - for range cchecker.checkers { - if err := <-errc; err != nil { - errs = append(errs, err) - } - } - return errsToError(errs) -} - -type runnerChecker struct { - errc chan error -} - -func (rc *runnerChecker) Check() error { - select { - case err := <-rc.errc: - return err - default: - return nil - } -} - -type noChecker struct{} - -func newNoChecker() Checker { return &noChecker{} } -func (nc *noChecker) Check() error { return nil } diff --git a/functional/tester/checker_no_check.go b/functional/tester/checker_no_check.go new file mode 100644 index 00000000000..d3670231988 --- /dev/null +++ b/functional/tester/checker_no_check.go @@ -0,0 +1,24 @@ +// Copyright 2018 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tester + +import "github.com/coreos/etcd/functional/rpcpb" + +type noCheck struct{} + +func newNoChecker() Checker { return &noCheck{} } +func (nc *noCheck) Type() rpcpb.Checker { return rpcpb.Checker_NO_CHECK } +func (nc *noCheck) EtcdClientEndpoints() []string { return nil } +func (nc *noCheck) Check() error { return nil } diff --git a/functional/tester/checker_runner.go b/functional/tester/checker_runner.go new file mode 100644 index 00000000000..a5b7ff4d15f --- /dev/null +++ b/functional/tester/checker_runner.go @@ -0,0 +1,48 @@ +// Copyright 2018 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tester + +import "github.com/coreos/etcd/functional/rpcpb" + +type runnerChecker struct { + ctype rpcpb.Checker + etcdClientEndpoint string + errc chan error +} + +func newRunnerChecker(ep string, errc chan error) Checker { + return &runnerChecker{ + ctype: rpcpb.Checker_RUNNER, + etcdClientEndpoint: ep, + errc: errc, + } +} + +func (rc *runnerChecker) Type() rpcpb.Checker { + return rc.ctype +} + +func (rc *runnerChecker) EtcdClientEndpoints() []string { + return []string{rc.etcdClientEndpoint} +} + +func (rc *runnerChecker) Check() error { + select { + case err := <-rc.errc: + return err + default: + return nil + } +} diff --git a/functional/tester/cluster.go b/functional/tester/cluster.go index c3e75dede45..b18084d48ef 100644 --- a/functional/tester/cluster.go +++ b/functional/tester/cluster.go @@ -52,11 +52,11 @@ type Cluster struct { Members []*rpcpb.Member `yaml:"agent-configs"` Tester *rpcpb.Tester `yaml:"tester-config"` - failures []Failure + cases []Case rateLimiter *rate.Limiter stresser Stresser - checker Checker + checkers []Checker currentRevision int64 rd int @@ -80,7 +80,7 @@ func NewCluster(lg *zap.Logger, fpath string) (*Cluster, error) { clus.agentClients = make([]rpcpb.TransportClient, len(clus.Members)) clus.agentStreams = make([]rpcpb.Transport_TransportClient, len(clus.Members)) clus.agentRequests = make([]*rpcpb.Request, len(clus.Members)) - clus.failures = make([]Failure, 0) + clus.cases = make([]Case, 0) for i, ap := range clus.Members { var err error @@ -111,18 +111,27 @@ func NewCluster(lg *zap.Logger, fpath string) (*Cluster, error) { } go clus.serveTesterServer() - clus.updateFailures() + clus.updateCases() clus.rateLimiter = rate.NewLimiter( rate.Limit(int(clus.Tester.StressQPS)), int(clus.Tester.StressQPS), ) - clus.updateStresserChecker() + clus.setStresserChecker() return clus, nil } +// EtcdClientEndpoints returns all etcd client endpoints. +func (clus *Cluster) EtcdClientEndpoints() (css []string) { + css = make([]string, len(clus.Members)) + for i := range clus.Members { + css[i] = clus.Members[i].EtcdClientEndpoint + } + return css +} + func (clus *Cluster) serveTesterServer() { clus.lg.Info( "started tester HTTP server", @@ -139,124 +148,127 @@ func (clus *Cluster) serveTesterServer() { } } -func (clus *Cluster) updateFailures() { - for _, cs := range clus.Tester.FailureCases { +func (clus *Cluster) updateCases() { + for _, cs := range clus.Tester.Cases { switch cs { case "SIGTERM_ONE_FOLLOWER": - clus.failures = append(clus.failures, - new_FailureCase_SIGTERM_ONE_FOLLOWER(clus)) + clus.cases = append(clus.cases, + new_Case_SIGTERM_ONE_FOLLOWER(clus)) case "SIGTERM_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT": - clus.failures = append(clus.failures, - new_FailureCase_SIGTERM_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT(clus)) + clus.cases = append(clus.cases, + new_Case_SIGTERM_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT(clus)) case "SIGTERM_LEADER": - clus.failures = append(clus.failures, - new_FailureCase_SIGTERM_LEADER(clus)) + clus.cases = append(clus.cases, + new_Case_SIGTERM_LEADER(clus)) case "SIGTERM_LEADER_UNTIL_TRIGGER_SNAPSHOT": - clus.failures = append(clus.failures, - new_FailureCase_SIGTERM_LEADER_UNTIL_TRIGGER_SNAPSHOT(clus)) + clus.cases = append(clus.cases, + new_Case_SIGTERM_LEADER_UNTIL_TRIGGER_SNAPSHOT(clus)) case "SIGTERM_QUORUM": - clus.failures = append(clus.failures, - new_FailureCase_SIGTERM_QUORUM(clus)) + clus.cases = append(clus.cases, + new_Case_SIGTERM_QUORUM(clus)) case "SIGTERM_ALL": - clus.failures = append(clus.failures, - new_FailureCase_SIGTERM_ALL(clus)) + clus.cases = append(clus.cases, + new_Case_SIGTERM_ALL(clus)) case "SIGQUIT_AND_REMOVE_ONE_FOLLOWER": - clus.failures = append(clus.failures, - new_FailureCase_SIGQUIT_AND_REMOVE_ONE_FOLLOWER(clus)) + clus.cases = append(clus.cases, + new_Case_SIGQUIT_AND_REMOVE_ONE_FOLLOWER(clus)) case "SIGQUIT_AND_REMOVE_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT": - clus.failures = append(clus.failures, - new_FailureCase_SIGQUIT_AND_REMOVE_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT(clus)) + clus.cases = append(clus.cases, + new_Case_SIGQUIT_AND_REMOVE_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT(clus)) case "SIGQUIT_AND_REMOVE_LEADER": - clus.failures = append(clus.failures, - new_FailureCase_SIGQUIT_AND_REMOVE_LEADER(clus)) + clus.cases = append(clus.cases, + new_Case_SIGQUIT_AND_REMOVE_LEADER(clus)) case "SIGQUIT_AND_REMOVE_LEADER_UNTIL_TRIGGER_SNAPSHOT": - clus.failures = append(clus.failures, - new_FailureCase_SIGQUIT_AND_REMOVE_LEADER_UNTIL_TRIGGER_SNAPSHOT(clus)) + clus.cases = append(clus.cases, + new_Case_SIGQUIT_AND_REMOVE_LEADER_UNTIL_TRIGGER_SNAPSHOT(clus)) + case "SIGQUIT_AND_REMOVE_QUORUM_AND_RESTORE_LEADER_SNAPSHOT_FROM_SCRATCH": + clus.cases = append(clus.cases, + new_Case_SIGQUIT_AND_REMOVE_QUORUM_AND_RESTORE_LEADER_SNAPSHOT_FROM_SCRATCH(clus)) case "BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER": - clus.failures = append(clus.failures, - new_FailureCase_BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER(clus)) + clus.cases = append(clus.cases, + new_Case_BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER(clus)) case "BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT": - clus.failures = append(clus.failures, - new_FailureCase_BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT()) + clus.cases = append(clus.cases, + new_Case_BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT()) case "BLACKHOLE_PEER_PORT_TX_RX_LEADER": - clus.failures = append(clus.failures, - new_FailureCase_BLACKHOLE_PEER_PORT_TX_RX_LEADER(clus)) + clus.cases = append(clus.cases, + new_Case_BLACKHOLE_PEER_PORT_TX_RX_LEADER(clus)) case "BLACKHOLE_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT": - clus.failures = append(clus.failures, - new_FailureCase_BLACKHOLE_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT()) + clus.cases = append(clus.cases, + new_Case_BLACKHOLE_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT()) case "BLACKHOLE_PEER_PORT_TX_RX_QUORUM": - clus.failures = append(clus.failures, - new_FailureCase_BLACKHOLE_PEER_PORT_TX_RX_QUORUM(clus)) + clus.cases = append(clus.cases, + new_Case_BLACKHOLE_PEER_PORT_TX_RX_QUORUM(clus)) case "BLACKHOLE_PEER_PORT_TX_RX_ALL": - clus.failures = append(clus.failures, - new_FailureCase_BLACKHOLE_PEER_PORT_TX_RX_ALL(clus)) + clus.cases = append(clus.cases, + new_Case_BLACKHOLE_PEER_PORT_TX_RX_ALL(clus)) case "DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER": - clus.failures = append(clus.failures, - new_FailureCase_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER(clus, false)) + clus.cases = append(clus.cases, + new_Case_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER(clus, false)) case "RANDOM_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER": - clus.failures = append(clus.failures, - new_FailureCase_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER(clus, true)) + clus.cases = append(clus.cases, + new_Case_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER(clus, true)) case "DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT": - clus.failures = append(clus.failures, - new_FailureCase_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT(clus, false)) + clus.cases = append(clus.cases, + new_Case_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT(clus, false)) case "RANDOM_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT": - clus.failures = append(clus.failures, - new_FailureCase_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT(clus, true)) + clus.cases = append(clus.cases, + new_Case_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT(clus, true)) case "DELAY_PEER_PORT_TX_RX_LEADER": - clus.failures = append(clus.failures, - new_FailureCase_DELAY_PEER_PORT_TX_RX_LEADER(clus, false)) + clus.cases = append(clus.cases, + new_Case_DELAY_PEER_PORT_TX_RX_LEADER(clus, false)) case "RANDOM_DELAY_PEER_PORT_TX_RX_LEADER": - clus.failures = append(clus.failures, - new_FailureCase_DELAY_PEER_PORT_TX_RX_LEADER(clus, true)) + clus.cases = append(clus.cases, + new_Case_DELAY_PEER_PORT_TX_RX_LEADER(clus, true)) case "DELAY_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT": - clus.failures = append(clus.failures, - new_FailureCase_DELAY_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT(clus, false)) + clus.cases = append(clus.cases, + new_Case_DELAY_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT(clus, false)) case "RANDOM_DELAY_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT": - clus.failures = append(clus.failures, - new_FailureCase_DELAY_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT(clus, true)) + clus.cases = append(clus.cases, + new_Case_DELAY_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT(clus, true)) case "DELAY_PEER_PORT_TX_RX_QUORUM": - clus.failures = append(clus.failures, - new_FailureCase_DELAY_PEER_PORT_TX_RX_QUORUM(clus, false)) + clus.cases = append(clus.cases, + new_Case_DELAY_PEER_PORT_TX_RX_QUORUM(clus, false)) case "RANDOM_DELAY_PEER_PORT_TX_RX_QUORUM": - clus.failures = append(clus.failures, - new_FailureCase_DELAY_PEER_PORT_TX_RX_QUORUM(clus, true)) + clus.cases = append(clus.cases, + new_Case_DELAY_PEER_PORT_TX_RX_QUORUM(clus, true)) case "DELAY_PEER_PORT_TX_RX_ALL": - clus.failures = append(clus.failures, - new_FailureCase_DELAY_PEER_PORT_TX_RX_ALL(clus, false)) + clus.cases = append(clus.cases, + new_Case_DELAY_PEER_PORT_TX_RX_ALL(clus, false)) case "RANDOM_DELAY_PEER_PORT_TX_RX_ALL": - clus.failures = append(clus.failures, - new_FailureCase_DELAY_PEER_PORT_TX_RX_ALL(clus, true)) + clus.cases = append(clus.cases, + new_Case_DELAY_PEER_PORT_TX_RX_ALL(clus, true)) case "NO_FAIL_WITH_STRESS": - clus.failures = append(clus.failures, - new_FailureCase_NO_FAIL_WITH_STRESS(clus)) + clus.cases = append(clus.cases, + new_Case_NO_FAIL_WITH_STRESS(clus)) case "NO_FAIL_WITH_NO_STRESS_FOR_LIVENESS": - clus.failures = append(clus.failures, - new_FailureCase_NO_FAIL_WITH_NO_STRESS_FOR_LIVENESS(clus)) + clus.cases = append(clus.cases, + new_Case_NO_FAIL_WITH_NO_STRESS_FOR_LIVENESS(clus)) case "EXTERNAL": - clus.failures = append(clus.failures, - new_FailureCase_EXTERNAL(clus.Tester.ExternalExecPath)) + clus.cases = append(clus.cases, + new_Case_EXTERNAL(clus.Tester.ExternalExecPath)) case "FAILPOINTS": fpFailures, fperr := failpointFailures(clus) if len(fpFailures) == 0 { clus.lg.Info("no failpoints found!", zap.Error(fperr)) } - clus.failures = append(clus.failures, + clus.cases = append(clus.cases, fpFailures...) } } } -func (clus *Cluster) failureStrings() (fs []string) { - fs = make([]string, len(clus.failures)) - for i := range clus.failures { - fs[i] = clus.failures[i].Desc() +func (clus *Cluster) listCases() (css []string) { + css = make([]string, len(clus.cases)) + for i := range clus.cases { + css[i] = clus.cases[i].Desc() } - return fs + return css } // UpdateDelayLatencyMs updates delay latency with random value @@ -271,26 +283,49 @@ func (clus *Cluster) UpdateDelayLatencyMs() { } } -func (clus *Cluster) updateStresserChecker() { - cs := &compositeStresser{} +func (clus *Cluster) setStresserChecker() { + css := &compositeStresser{} + lss := []*leaseStresser{} + rss := []*runnerStresser{} for _, m := range clus.Members { - cs.stressers = append(cs.stressers, newStresser(clus, m)) + sss := newStresser(clus, m) + css.stressers = append(css.stressers, &compositeStresser{sss}) + for _, s := range sss { + if v, ok := s.(*leaseStresser); ok { + lss = append(lss, v) + clus.lg.Info("added lease stresser", zap.String("endpoint", m.EtcdClientEndpoint)) + } + if v, ok := s.(*runnerStresser); ok { + rss = append(rss, v) + clus.lg.Info("added lease stresser", zap.String("endpoint", m.EtcdClientEndpoint)) + } + } } - clus.stresser = cs + clus.stresser = css + + for _, cs := range clus.Tester.Checkers { + switch cs { + case "KV_HASH": + clus.checkers = append(clus.checkers, newKVHashChecker(clus)) + + case "LEASE_EXPIRE": + for _, ls := range lss { + clus.checkers = append(clus.checkers, newLeaseExpireChecker(ls)) + } + + case "RUNNER": + for _, rs := range rss { + clus.checkers = append(clus.checkers, newRunnerChecker(rs.etcdClientEndpoint, rs.errc)) + } - if clus.Tester.ConsistencyCheck { - clus.checker = newHashChecker(clus.lg, hashAndRevGetter(clus)) - if schk := cs.Checker(); schk != nil { - clus.checker = newCompositeChecker([]Checker{clus.checker, schk}) + case "NO_CHECK": + clus.checkers = append(clus.checkers, newNoChecker()) } - } else { - clus.checker = newNoChecker() } - clus.lg.Info("updated stressers") } -func (clus *Cluster) checkConsistency() (err error) { +func (clus *Cluster) runCheckers(exceptions ...rpcpb.Checker) (err error) { defer func() { if err != nil { return @@ -304,23 +339,37 @@ func (clus *Cluster) checkConsistency() (err error) { } }() - if err = clus.checker.Check(); err != nil { + exs := make(map[rpcpb.Checker]struct{}) + for _, e := range exceptions { + exs[e] = struct{}{} + } + for _, chk := range clus.checkers { + clus.lg.Warn( + "consistency check START", + zap.String("checker", chk.Type().String()), + zap.Strings("client-endpoints", chk.EtcdClientEndpoints()), + ) + err = chk.Check() clus.lg.Warn( - "consistency check FAIL", - zap.Int("round", clus.rd), - zap.Int("case", clus.cs), + "consistency check END", + zap.String("checker", chk.Type().String()), + zap.Strings("client-endpoints", chk.EtcdClientEndpoints()), zap.Error(err), ) - return err + if err != nil { + _, ok := exs[chk.Type()] + if !ok { + return err + } + clus.lg.Warn( + "consistency check SKIP FAIL", + zap.String("checker", chk.Type().String()), + zap.Strings("client-endpoints", chk.EtcdClientEndpoints()), + zap.Error(err), + ) + } } - clus.lg.Info( - "consistency check ALL PASS", - zap.Int("round", clus.rd), - zap.Int("case", clus.cs), - zap.String("desc", clus.failures[clus.cs].Desc()), - ) - - return err + return nil } // Send_INITIAL_START_ETCD bootstraps etcd cluster the very first time. @@ -390,6 +439,11 @@ func (clus *Cluster) broadcast(op rpcpb.Operation) error { } func (clus *Cluster) sendOp(idx int, op rpcpb.Operation) error { + _, err := clus.sendOpWithResp(idx, op) + return err +} + +func (clus *Cluster) sendOpWithResp(idx int, op rpcpb.Operation) (*rpcpb.Response, error) { // maintain the initial member object // throughout the test time clus.agentRequests[idx] = &rpcpb.Request{ @@ -406,7 +460,7 @@ func (clus *Cluster) sendOp(idx int, op rpcpb.Operation) error { zap.Error(err), ) if err != nil { - return err + return nil, err } resp, err := clus.agentStreams[idx].Recv() @@ -428,18 +482,18 @@ func (clus *Cluster) sendOp(idx int, op rpcpb.Operation) error { ) } if err != nil { - return err + return nil, err } if !resp.Success { - return errors.New(resp.Status) + return nil, errors.New(resp.Status) } m, secure := clus.Members[idx], false for _, cu := range m.Etcd.AdvertiseClientURLs { u, err := url.Parse(cu) if err != nil { - return err + return nil, err } if u.Scheme == "https" { // TODO: handle unix secure = true @@ -455,16 +509,16 @@ func (clus *Cluster) sendOp(idx int, op rpcpb.Operation) error { "client", ) if err = fileutil.TouchDirAll(dirClient); err != nil { - return err + return nil, err } clientCertData := []byte(resp.Member.ClientCertData) if len(clientCertData) == 0 { - return fmt.Errorf("got empty client cert from %q", m.EtcdClientEndpoint) + return nil, fmt.Errorf("got empty client cert from %q", m.EtcdClientEndpoint) } clientCertPath := filepath.Join(dirClient, "cert.pem") if err = ioutil.WriteFile(clientCertPath, clientCertData, 0644); err != nil { // overwrite if exists - return err + return nil, err } resp.Member.ClientCertPath = clientCertPath clus.lg.Info( @@ -474,11 +528,11 @@ func (clus *Cluster) sendOp(idx int, op rpcpb.Operation) error { clientKeyData := []byte(resp.Member.ClientKeyData) if len(clientKeyData) == 0 { - return fmt.Errorf("got empty client key from %q", m.EtcdClientEndpoint) + return nil, fmt.Errorf("got empty client key from %q", m.EtcdClientEndpoint) } clientKeyPath := filepath.Join(dirClient, "key.pem") if err = ioutil.WriteFile(clientKeyPath, clientKeyData, 0644); err != nil { // overwrite if exists - return err + return nil, err } resp.Member.ClientKeyPath = clientKeyPath clus.lg.Info( @@ -491,7 +545,7 @@ func (clus *Cluster) sendOp(idx int, op rpcpb.Operation) error { // TODO: disable this when auto TLS is deprecated clientTrustedCAPath := filepath.Join(dirClient, "ca.pem") if err = ioutil.WriteFile(clientTrustedCAPath, clientTrustedCAData, 0644); err != nil { // overwrite if exists - return err + return nil, err } resp.Member.ClientTrustedCAPath = clientTrustedCAPath clus.lg.Info( @@ -504,7 +558,8 @@ func (clus *Cluster) sendOp(idx int, op rpcpb.Operation) error { clus.Members[idx] = resp.Member } - return nil + + return resp, nil } // Send_SIGQUIT_ETCD_AND_REMOVE_DATA_AND_STOP_AGENT terminates all tester connections to agents and etcd servers. @@ -690,14 +745,14 @@ func (clus *Cluster) defrag() error { "defrag ALL PASS", zap.Int("round", clus.rd), zap.Int("case", clus.cs), - zap.Int("case-total", len(clus.failures)), + zap.Int("case-total", len(clus.cases)), ) return nil } -// GetFailureDelayDuration computes failure delay duration. -func (clus *Cluster) GetFailureDelayDuration() time.Duration { - return time.Duration(clus.Tester.FailureDelayMs) * time.Millisecond +// GetCaseDelayDuration computes failure delay duration. +func (clus *Cluster) GetCaseDelayDuration() time.Duration { + return time.Duration(clus.Tester.CaseDelayMs) * time.Millisecond } // Report reports the number of modified keys. diff --git a/functional/tester/cluster_read_config.go b/functional/tester/cluster_read_config.go index d5c2ff2ab3a..223265e66c8 100644 --- a/functional/tester/cluster_read_config.go +++ b/functional/tester/cluster_read_config.go @@ -320,8 +320,8 @@ func read(lg *zap.Logger, fpath string) (*Cluster, error) { } } - if len(clus.Tester.FailureCases) == 0 { - return nil, errors.New("FailureCases not found") + if len(clus.Tester.Cases) == 0 { + return nil, errors.New("Cases not found") } if clus.Tester.DelayLatencyMs <= clus.Tester.DelayLatencyMsRv*5 { return nil, fmt.Errorf("delay latency %d ms must be greater than 5x of delay latency random variable %d ms", clus.Tester.DelayLatencyMs, clus.Tester.DelayLatencyMsRv) @@ -330,15 +330,20 @@ func read(lg *zap.Logger, fpath string) (*Cluster, error) { clus.Tester.UpdatedDelayLatencyMs = clus.Tester.DelayLatencyMs } - for _, v := range clus.Tester.FailureCases { - if _, ok := rpcpb.FailureCase_value[v]; !ok { - return nil, fmt.Errorf("%q is not defined in 'rpcpb.FailureCase_value'", v) + for _, v := range clus.Tester.Cases { + if _, ok := rpcpb.Case_value[v]; !ok { + return nil, fmt.Errorf("%q is not defined in 'rpcpb.Case_value'", v) } } - for _, v := range clus.Tester.StressTypes { - if _, ok := rpcpb.StressType_value[v]; !ok { - return nil, fmt.Errorf("StressType is unknown; got %q", v) + for _, v := range clus.Tester.Stressers { + if _, ok := rpcpb.Stresser_value[v]; !ok { + return nil, fmt.Errorf("Stresser is unknown; got %q", v) + } + } + for _, v := range clus.Tester.Checkers { + if _, ok := rpcpb.Checker_value[v]; !ok { + return nil, fmt.Errorf("Checker is unknown; got %q", v) } } diff --git a/functional/tester/cluster_run.go b/functional/tester/cluster_run.go index 9185cad0b3d..6dd0021062d 100644 --- a/functional/tester/cluster_run.go +++ b/functional/tester/cluster_run.go @@ -51,7 +51,7 @@ func (clus *Cluster) Run() { "round FAIL", zap.Int("round", clus.rd), zap.Int("case", clus.cs), - zap.Int("case-total", len(clus.failures)), + zap.Int("case-total", len(clus.cases)), zap.Error(err), ) if clus.cleanup() != nil { @@ -75,7 +75,7 @@ func (clus *Cluster) Run() { "compact START", zap.Int("round", clus.rd), zap.Int("case", clus.cs), - zap.Int("case-total", len(clus.failures)), + zap.Int("case-total", len(clus.cases)), zap.Duration("timeout", timeout), ) if err := clus.compact(revToCompact, timeout); err != nil { @@ -83,7 +83,7 @@ func (clus *Cluster) Run() { "compact FAIL", zap.Int("round", clus.rd), zap.Int("case", clus.cs), - zap.Int("case-total", len(clus.failures)), + zap.Int("case-total", len(clus.cases)), zap.Error(err), ) if err = clus.cleanup(); err != nil { @@ -91,7 +91,7 @@ func (clus *Cluster) Run() { "cleanup FAIL", zap.Int("round", clus.rd), zap.Int("case", clus.cs), - zap.Int("case-total", len(clus.failures)), + zap.Int("case-total", len(clus.cases)), zap.Error(err), ) return @@ -111,13 +111,13 @@ func (clus *Cluster) Run() { "functional-tester PASS", zap.Int("round", clus.rd), zap.Int("case", clus.cs), - zap.Int("case-total", len(clus.failures)), + zap.Int("case-total", len(clus.cases)), ) } func (clus *Cluster) doRound() error { - if clus.Tester.FailureShuffle { - clus.shuffleFailures() + if clus.Tester.CaseShuffle { + clus.shuffleCases() } roundNow := time.Now() @@ -125,10 +125,10 @@ func (clus *Cluster) doRound() error { "round START", zap.Int("round", clus.rd), zap.Int("case", clus.cs), - zap.Int("case-total", len(clus.failures)), - zap.Strings("failures", clus.failureStrings()), + zap.Int("case-total", len(clus.cases)), + zap.Strings("cases", clus.listCases()), ) - for i, fa := range clus.failures { + for i, fa := range clus.cases { clus.cs = i caseTotal[fa.Desc()]++ @@ -139,7 +139,7 @@ func (clus *Cluster) doRound() error { "case START", zap.Int("round", clus.rd), zap.Int("case", clus.cs), - zap.Int("case-total", len(clus.failures)), + zap.Int("case-total", len(clus.cases)), zap.String("desc", fa.Desc()), ) @@ -149,13 +149,13 @@ func (clus *Cluster) doRound() error { } stressStarted := false - fcase := fa.FailureCase() - if fcase != rpcpb.FailureCase_NO_FAIL_WITH_NO_STRESS_FOR_LIVENESS { + fcase := fa.TestCase() + if fcase != rpcpb.Case_NO_FAIL_WITH_NO_STRESS_FOR_LIVENESS { clus.lg.Info( "stress START", zap.Int("round", clus.rd), zap.Int("case", clus.cs), - zap.Int("case-total", len(clus.failures)), + zap.Int("case-total", len(clus.cases)), zap.String("desc", fa.Desc()), ) if err := clus.stresser.Stress(); err != nil { @@ -168,7 +168,7 @@ func (clus *Cluster) doRound() error { "inject START", zap.Int("round", clus.rd), zap.Int("case", clus.cs), - zap.Int("case-total", len(clus.failures)), + zap.Int("case-total", len(clus.cases)), zap.String("desc", fa.Desc()), ) if err := fa.Inject(clus); err != nil { @@ -182,7 +182,7 @@ func (clus *Cluster) doRound() error { "recover START", zap.Int("round", clus.rd), zap.Int("case", clus.cs), - zap.Int("case-total", len(clus.failures)), + zap.Int("case-total", len(clus.cases)), zap.String("desc", fa.Desc()), ) if err := fa.Recover(clus); err != nil { @@ -194,11 +194,11 @@ func (clus *Cluster) doRound() error { "stress PAUSE", zap.Int("round", clus.rd), zap.Int("case", clus.cs), - zap.Int("case-total", len(clus.failures)), + zap.Int("case-total", len(clus.cases)), zap.String("desc", fa.Desc()), ) ems := clus.stresser.Pause() - if fcase == rpcpb.FailureCase_NO_FAIL_WITH_STRESS && len(ems) > 0 { + if fcase == rpcpb.Case_NO_FAIL_WITH_STRESS && len(ems) > 0 { ess := make([]string, 0, len(ems)) cnt := 0 for k, v := range ems { @@ -223,29 +223,35 @@ func (clus *Cluster) doRound() error { "health check START", zap.Int("round", clus.rd), zap.Int("case", clus.cs), - zap.Int("case-total", len(clus.failures)), + zap.Int("case-total", len(clus.cases)), zap.String("desc", fa.Desc()), ) if err := clus.WaitHealth(); err != nil { return fmt.Errorf("wait full health error: %v", err) } + checkerFailExceptions := []rpcpb.Checker{} + switch fcase { + case rpcpb.Case_SIGQUIT_AND_REMOVE_QUORUM_AND_RESTORE_LEADER_SNAPSHOT_FROM_SCRATCH: + // TODO: restore from snapshot + checkerFailExceptions = append(checkerFailExceptions, rpcpb.Checker_LEASE_EXPIRE) + } + clus.lg.Info( "consistency check START", zap.Int("round", clus.rd), zap.Int("case", clus.cs), - zap.Int("case-total", len(clus.failures)), + zap.Int("case-total", len(clus.cases)), zap.String("desc", fa.Desc()), ) - if err := clus.checkConsistency(); err != nil { + if err := clus.runCheckers(checkerFailExceptions...); err != nil { return fmt.Errorf("consistency check error (%v)", err) } - clus.lg.Info( - "case PASS", + "consistency check PASS", zap.Int("round", clus.rd), zap.Int("case", clus.cs), - zap.Int("case-total", len(clus.failures)), + zap.Int("case-total", len(clus.cases)), zap.String("desc", fa.Desc()), zap.Duration("took", time.Since(caseNow)), ) @@ -254,8 +260,8 @@ func (clus *Cluster) doRound() error { clus.lg.Info( "round ALL PASS", zap.Int("round", clus.rd), - zap.Strings("failures", clus.failureStrings()), - zap.Int("case-total", len(clus.failures)), + zap.Strings("cases", clus.listCases()), + zap.Int("case-total", len(clus.cases)), zap.Duration("took", time.Since(roundNow)), ) return nil @@ -314,7 +320,7 @@ func (clus *Cluster) failed() { "functional-tester FAIL", zap.Int("round", clus.rd), zap.Int("case", clus.cs), - zap.Int("case-total", len(clus.failures)), + zap.Int("case-total", len(clus.cases)), ) clus.Send_SIGQUIT_ETCD_AND_REMOVE_DATA_AND_STOP_AGENT() @@ -322,14 +328,14 @@ func (clus *Cluster) failed() { } func (clus *Cluster) cleanup() error { - if clus.Tester.ExitOnFailure { + if clus.Tester.ExitOnCaseFail { defer clus.failed() } roundFailedTotalCounter.Inc() desc := "compact/defrag" if clus.cs != -1 { - desc = clus.failures[clus.cs].Desc() + desc = clus.cases[clus.cs].Desc() } caseFailedTotalCounter.WithLabelValues(desc).Inc() @@ -337,7 +343,7 @@ func (clus *Cluster) cleanup() error { "closing stressers before archiving failure data", zap.Int("round", clus.rd), zap.Int("case", clus.cs), - zap.Int("case-total", len(clus.failures)), + zap.Int("case-total", len(clus.cases)), ) clus.stresser.Close() @@ -346,7 +352,7 @@ func (clus *Cluster) cleanup() error { "cleanup FAIL", zap.Int("round", clus.rd), zap.Int("case", clus.cs), - zap.Int("case-total", len(clus.failures)), + zap.Int("case-total", len(clus.cases)), zap.Error(err), ) return err @@ -356,12 +362,12 @@ func (clus *Cluster) cleanup() error { "restart FAIL", zap.Int("round", clus.rd), zap.Int("case", clus.cs), - zap.Int("case-total", len(clus.failures)), + zap.Int("case-total", len(clus.cases)), zap.Error(err), ) return err } - clus.updateStresserChecker() + clus.setStresserChecker() return nil } diff --git a/functional/tester/cluster_shuffle.go b/functional/tester/cluster_shuffle.go index 2eaf2737b75..16c79b2f6e2 100644 --- a/functional/tester/cluster_shuffle.go +++ b/functional/tester/cluster_shuffle.go @@ -21,17 +21,17 @@ import ( "go.uber.org/zap" ) -func (clus *Cluster) shuffleFailures() { +func (clus *Cluster) shuffleCases() { rand.Seed(time.Now().UnixNano()) offset := rand.Intn(1000) - n := len(clus.failures) + n := len(clus.cases) cp := coprime(n) - fs := make([]Failure, n) + css := make([]Case, n) for i := 0; i < n; i++ { - fs[i] = clus.failures[(cp*i+offset)%n] + css[i] = clus.cases[(cp*i+offset)%n] } - clus.failures = fs + clus.cases = css clus.lg.Info("shuffled test failure cases", zap.Int("total", n)) } diff --git a/functional/tester/cluster_test.go b/functional/tester/cluster_test.go index b4c1f136015..dccc3630702 100644 --- a/functional/tester/cluster_test.go +++ b/functional/tester/cluster_test.go @@ -31,15 +31,15 @@ func Test_read(t *testing.T) { EtcdExecPath: "./bin/etcd", AgentAddr: "127.0.0.1:19027", FailpointHTTPAddr: "http://127.0.0.1:7381", - BaseDir: "/tmp/etcd-agent-data-1", - EtcdLogPath: "/tmp/etcd-agent-data-1/current-etcd.log", + BaseDir: "/tmp/etcd-functional-1", + EtcdLogPath: "/tmp/etcd-functional-1/etcd.log", EtcdClientProxy: false, EtcdPeerProxy: true, EtcdClientEndpoint: "127.0.0.1:1379", Etcd: &rpcpb.Etcd{ Name: "s1", - DataDir: "/tmp/etcd-agent-data-1/etcd.data", - WALDir: "/tmp/etcd-agent-data-1/etcd.data/member/wal", + DataDir: "/tmp/etcd-functional-1/etcd.data", + WALDir: "/tmp/etcd-functional-1/etcd.data/member/wal", HeartbeatIntervalMs: 100, ElectionTimeoutMs: 1000, ListenClientURLs: []string{"https://127.0.0.1:1379"}, @@ -64,20 +64,33 @@ func Test_read(t *testing.T) { PreVote: true, InitialCorruptCheck: true, }, + ClientCertData: "", + ClientCertPath: "", + ClientKeyData: "", + ClientKeyPath: "", + ClientTrustedCAData: "", + ClientTrustedCAPath: "", + PeerCertData: "", + PeerCertPath: "", + PeerKeyData: "", + PeerKeyPath: "", + PeerTrustedCAData: "", + PeerTrustedCAPath: "", + SnapshotPath: "/tmp/etcd-functional-1.snapshot.db", }, { EtcdExecPath: "./bin/etcd", AgentAddr: "127.0.0.1:29027", FailpointHTTPAddr: "http://127.0.0.1:7382", - BaseDir: "/tmp/etcd-agent-data-2", - EtcdLogPath: "/tmp/etcd-agent-data-2/current-etcd.log", + BaseDir: "/tmp/etcd-functional-2", + EtcdLogPath: "/tmp/etcd-functional-2/etcd.log", EtcdClientProxy: false, EtcdPeerProxy: true, EtcdClientEndpoint: "127.0.0.1:2379", Etcd: &rpcpb.Etcd{ Name: "s2", - DataDir: "/tmp/etcd-agent-data-2/etcd.data", - WALDir: "/tmp/etcd-agent-data-2/etcd.data/member/wal", + DataDir: "/tmp/etcd-functional-2/etcd.data", + WALDir: "/tmp/etcd-functional-2/etcd.data/member/wal", HeartbeatIntervalMs: 100, ElectionTimeoutMs: 1000, ListenClientURLs: []string{"https://127.0.0.1:2379"}, @@ -102,20 +115,33 @@ func Test_read(t *testing.T) { PreVote: true, InitialCorruptCheck: true, }, + ClientCertData: "", + ClientCertPath: "", + ClientKeyData: "", + ClientKeyPath: "", + ClientTrustedCAData: "", + ClientTrustedCAPath: "", + PeerCertData: "", + PeerCertPath: "", + PeerKeyData: "", + PeerKeyPath: "", + PeerTrustedCAData: "", + PeerTrustedCAPath: "", + SnapshotPath: "/tmp/etcd-functional-2.snapshot.db", }, { EtcdExecPath: "./bin/etcd", AgentAddr: "127.0.0.1:39027", FailpointHTTPAddr: "http://127.0.0.1:7383", - BaseDir: "/tmp/etcd-agent-data-3", - EtcdLogPath: "/tmp/etcd-agent-data-3/current-etcd.log", + BaseDir: "/tmp/etcd-functional-3", + EtcdLogPath: "/tmp/etcd-functional-3/etcd.log", EtcdClientProxy: false, EtcdPeerProxy: true, EtcdClientEndpoint: "127.0.0.1:3379", Etcd: &rpcpb.Etcd{ Name: "s3", - DataDir: "/tmp/etcd-agent-data-3/etcd.data", - WALDir: "/tmp/etcd-agent-data-3/etcd.data/member/wal", + DataDir: "/tmp/etcd-functional-3/etcd.data", + WALDir: "/tmp/etcd-functional-3/etcd.data/member/wal", HeartbeatIntervalMs: 100, ElectionTimeoutMs: 1000, ListenClientURLs: []string{"https://127.0.0.1:3379"}, @@ -140,6 +166,19 @@ func Test_read(t *testing.T) { PreVote: true, InitialCorruptCheck: true, }, + ClientCertData: "", + ClientCertPath: "", + ClientKeyData: "", + ClientKeyPath: "", + ClientTrustedCAData: "", + ClientTrustedCAPath: "", + PeerCertData: "", + PeerCertPath: "", + PeerKeyData: "", + PeerKeyPath: "", + PeerTrustedCAData: "", + PeerTrustedCAPath: "", + SnapshotPath: "/tmp/etcd-functional-3.snapshot.db", }, }, Tester: &rpcpb.Tester{ @@ -150,12 +189,11 @@ func Test_read(t *testing.T) { DelayLatencyMsRv: 500, UpdatedDelayLatencyMs: 5000, RoundLimit: 1, - ExitOnFailure: true, - ConsistencyCheck: true, + ExitOnCaseFail: true, EnablePprof: true, - FailureDelayMs: 7000, - FailureShuffle: true, - FailureCases: []string{ + CaseDelayMs: 7000, + CaseShuffle: true, + Cases: []string{ "SIGTERM_ONE_FOLLOWER", "SIGTERM_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT", "SIGTERM_LEADER", @@ -166,6 +204,7 @@ func Test_read(t *testing.T) { "SIGQUIT_AND_REMOVE_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT", // "SIGQUIT_AND_REMOVE_LEADER", // "SIGQUIT_AND_REMOVE_LEADER_UNTIL_TRIGGER_SNAPSHOT", + "SIGQUIT_AND_REMOVE_QUORUM_AND_RESTORE_LEADER_SNAPSHOT_FROM_SCRATCH", "BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER", "BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT", "BLACKHOLE_PEER_PORT_TX_RX_LEADER", @@ -190,7 +229,8 @@ func Test_read(t *testing.T) { FailpointCommands: []string{`panic("etcd-tester")`}, RunnerExecPath: "./bin/etcd-runner", ExternalExecPath: "", - StressTypes: []string{"KV", "LEASE"}, + Stressers: []string{"KV", "LEASE"}, + Checkers: []string{"KV_HASH", "LEASE_EXPIRE"}, StressKeySize: 100, StressKeySizeLarge: 32769, StressKeySuffixRange: 250000, @@ -219,17 +259,17 @@ func Test_read(t *testing.T) { cfg.lg = logger - cfg.updateFailures() - fs1 := cfg.failureStrings() + cfg.updateCases() + fs1 := cfg.listCases() - cfg.shuffleFailures() - fs2 := cfg.failureStrings() + cfg.shuffleCases() + fs2 := cfg.listCases() if reflect.DeepEqual(fs1, fs2) { t.Fatalf("expected shuffled failure cases, got %q", fs2) } - cfg.shuffleFailures() - fs3 := cfg.failureStrings() + cfg.shuffleCases() + fs3 := cfg.listCases() if reflect.DeepEqual(fs2, fs3) { t.Fatalf("expected reshuffled failure cases from %q, got %q", fs2, fs3) } diff --git a/functional/tester/failure.go b/functional/tester/failure.go deleted file mode 100644 index 2f84959a7d1..00000000000 --- a/functional/tester/failure.go +++ /dev/null @@ -1,317 +0,0 @@ -// Copyright 2018 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tester - -import ( - "fmt" - "math/rand" - "time" - - "github.com/coreos/etcd/functional/rpcpb" - - "go.uber.org/zap" -) - -// Failure defines failure injection interface. -// To add a fail case: -// 1. implement "Failure" interface -// 2. define fail case name in "rpcpb.FailureCase" -type Failure interface { - // Inject injeccts the failure into the testing cluster at the given - // round. When calling the function, the cluster should be in health. - Inject(clus *Cluster) error - // Recover recovers the injected failure caused by the injection of the - // given round and wait for the recovery of the testing cluster. - Recover(clus *Cluster) error - // Desc returns a description of the failure - Desc() string - // FailureCase returns "rpcpb.FailureCase" enum type. - FailureCase() rpcpb.FailureCase -} - -type injectMemberFunc func(*Cluster, int) error -type recoverMemberFunc func(*Cluster, int) error - -type failureByFunc struct { - desc string - failureCase rpcpb.FailureCase - injectMember injectMemberFunc - recoverMember recoverMemberFunc -} - -func (f *failureByFunc) Desc() string { - if f.desc != "" { - return f.desc - } - return f.failureCase.String() -} - -func (f *failureByFunc) FailureCase() rpcpb.FailureCase { - return f.failureCase -} - -type failureFollower struct { - failureByFunc - last int - lead int -} - -func (f *failureFollower) updateIndex(clus *Cluster) error { - idx, err := clus.GetLeader() - if err != nil { - return err - } - f.lead = idx - - n := len(clus.Members) - if f.last == -1 { // first run - f.last = clus.rd % n - if f.last == f.lead { - f.last = (f.last + 1) % n - } - } else { - f.last = (f.last + 1) % n - if f.last == f.lead { - f.last = (f.last + 1) % n - } - } - return nil -} - -func (f *failureFollower) Inject(clus *Cluster) error { - if err := f.updateIndex(clus); err != nil { - return err - } - return f.injectMember(clus, f.last) -} - -func (f *failureFollower) Recover(clus *Cluster) error { - return f.recoverMember(clus, f.last) -} - -func (f *failureFollower) Desc() string { - if f.desc != "" { - return f.desc - } - return f.failureCase.String() -} - -func (f *failureFollower) FailureCase() rpcpb.FailureCase { - return f.failureCase -} - -type failureLeader struct { - failureByFunc - last int - lead int -} - -func (f *failureLeader) updateIndex(clus *Cluster) error { - idx, err := clus.GetLeader() - if err != nil { - return err - } - f.lead = idx - f.last = idx - return nil -} - -func (f *failureLeader) Inject(clus *Cluster) error { - if err := f.updateIndex(clus); err != nil { - return err - } - return f.injectMember(clus, f.last) -} - -func (f *failureLeader) Recover(clus *Cluster) error { - return f.recoverMember(clus, f.last) -} - -func (f *failureLeader) FailureCase() rpcpb.FailureCase { - return f.failureCase -} - -type failureQuorum failureByFunc - -func (f *failureQuorum) Inject(clus *Cluster) error { - for i := range killMap(len(clus.Members), clus.rd) { - if err := f.injectMember(clus, i); err != nil { - return err - } - } - return nil -} - -func (f *failureQuorum) Recover(clus *Cluster) error { - for i := range killMap(len(clus.Members), clus.rd) { - if err := f.recoverMember(clus, i); err != nil { - return err - } - } - return nil -} - -func (f *failureQuorum) Desc() string { - if f.desc != "" { - return f.desc - } - return f.failureCase.String() -} - -func (f *failureQuorum) FailureCase() rpcpb.FailureCase { - return f.failureCase -} - -func killMap(size int, seed int) map[int]bool { - m := make(map[int]bool) - r := rand.New(rand.NewSource(int64(seed))) - majority := size/2 + 1 - for { - m[r.Intn(size)] = true - if len(m) >= majority { - return m - } - } -} - -type failureAll failureByFunc - -func (f *failureAll) Inject(clus *Cluster) error { - for i := range clus.Members { - if err := f.injectMember(clus, i); err != nil { - return err - } - } - return nil -} - -func (f *failureAll) Recover(clus *Cluster) error { - for i := range clus.Members { - if err := f.recoverMember(clus, i); err != nil { - return err - } - } - return nil -} - -func (f *failureAll) Desc() string { - if f.desc != "" { - return f.desc - } - return f.failureCase.String() -} - -func (f *failureAll) FailureCase() rpcpb.FailureCase { - return f.failureCase -} - -// failureUntilSnapshot injects a failure and waits for a snapshot event -type failureUntilSnapshot struct { - desc string - failureCase rpcpb.FailureCase - Failure -} - -// all delay failure cases except the ones failing with latency -// greater than election timeout (trigger leader election and -// cluster keeps operating anyways) -var slowCases = map[rpcpb.FailureCase]bool{ - rpcpb.FailureCase_RANDOM_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER: true, - rpcpb.FailureCase_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT: true, - rpcpb.FailureCase_RANDOM_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT: true, - rpcpb.FailureCase_RANDOM_DELAY_PEER_PORT_TX_RX_LEADER: true, - rpcpb.FailureCase_DELAY_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT: true, - rpcpb.FailureCase_RANDOM_DELAY_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT: true, - rpcpb.FailureCase_RANDOM_DELAY_PEER_PORT_TX_RX_QUORUM: true, - rpcpb.FailureCase_RANDOM_DELAY_PEER_PORT_TX_RX_ALL: true, -} - -func (f *failureUntilSnapshot) Inject(clus *Cluster) error { - if err := f.Failure.Inject(clus); err != nil { - return err - } - - snapshotCount := clus.Members[0].Etcd.SnapshotCount - - now := time.Now() - clus.lg.Info( - "trigger snapshot START", - zap.String("desc", f.Desc()), - zap.Int64("etcd-snapshot-count", snapshotCount), - ) - - // maxRev may fail since failure just injected, retry if failed. - startRev, err := clus.maxRev() - for i := 0; i < 10 && startRev == 0; i++ { - startRev, err = clus.maxRev() - } - if startRev == 0 { - return err - } - lastRev := startRev - - // healthy cluster could accept 1000 req/sec at least. - // 3x time to trigger snapshot. - retries := int(snapshotCount) / 1000 * 3 - if v, ok := slowCases[f.FailureCase()]; v && ok { - // slow network takes more retries - retries *= 5 - } - - for i := 0; i < retries; i++ { - lastRev, _ = clus.maxRev() - // If the number of proposals committed is bigger than snapshot count, - // a new snapshot should have been created. - diff := lastRev - startRev - if diff > snapshotCount { - clus.lg.Info( - "trigger snapshot PASS", - zap.Int("retries", i), - zap.String("desc", f.Desc()), - zap.Int64("committed-entries", diff), - zap.Int64("etcd-snapshot-count", snapshotCount), - zap.Int64("last-revision", lastRev), - zap.Duration("took", time.Since(now)), - ) - return nil - } - - clus.lg.Info( - "trigger snapshot PROGRESS", - zap.Int("retries", i), - zap.Int64("committed-entries", diff), - zap.Int64("etcd-snapshot-count", snapshotCount), - zap.Int64("last-revision", lastRev), - zap.Duration("took", time.Since(now)), - ) - time.Sleep(time.Second) - } - - return fmt.Errorf("cluster too slow: only %d commits in %d retries", lastRev-startRev, retries) -} - -func (f *failureUntilSnapshot) Desc() string { - if f.desc != "" { - return f.desc - } - if f.failureCase.String() != "" { - return f.failureCase.String() - } - return f.Failure.Desc() -} - -func (f *failureUntilSnapshot) FailureCase() rpcpb.FailureCase { - return f.failureCase -} diff --git a/functional/tester/failure_case_network_blackhole.go b/functional/tester/failure_case_network_blackhole.go deleted file mode 100644 index b75394a788f..00000000000 --- a/functional/tester/failure_case_network_blackhole.go +++ /dev/null @@ -1,101 +0,0 @@ -// Copyright 2018 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tester - -import "github.com/coreos/etcd/functional/rpcpb" - -func inject_BLACKHOLE_PEER_PORT_TX_RX(clus *Cluster, idx int) error { - return clus.sendOp(idx, rpcpb.Operation_BLACKHOLE_PEER_PORT_TX_RX) -} - -func recover_BLACKHOLE_PEER_PORT_TX_RX(clus *Cluster, idx int) error { - return clus.sendOp(idx, rpcpb.Operation_UNBLACKHOLE_PEER_PORT_TX_RX) -} - -func new_FailureCase_BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER(clus *Cluster) Failure { - ff := failureByFunc{ - failureCase: rpcpb.FailureCase_BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER, - injectMember: inject_BLACKHOLE_PEER_PORT_TX_RX, - recoverMember: recover_BLACKHOLE_PEER_PORT_TX_RX, - } - f := &failureFollower{ff, -1, -1} - return &failureDelay{ - Failure: f, - delayDuration: clus.GetFailureDelayDuration(), - } -} - -func new_FailureCase_BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT() Failure { - ff := failureByFunc{ - failureCase: rpcpb.FailureCase_BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT, - injectMember: inject_BLACKHOLE_PEER_PORT_TX_RX, - recoverMember: recover_BLACKHOLE_PEER_PORT_TX_RX, - } - f := &failureFollower{ff, -1, -1} - return &failureUntilSnapshot{ - failureCase: rpcpb.FailureCase_BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT, - Failure: f, - } -} - -func new_FailureCase_BLACKHOLE_PEER_PORT_TX_RX_LEADER(clus *Cluster) Failure { - ff := failureByFunc{ - failureCase: rpcpb.FailureCase_BLACKHOLE_PEER_PORT_TX_RX_LEADER, - injectMember: inject_BLACKHOLE_PEER_PORT_TX_RX, - recoverMember: recover_BLACKHOLE_PEER_PORT_TX_RX, - } - f := &failureLeader{ff, -1, -1} - return &failureDelay{ - Failure: f, - delayDuration: clus.GetFailureDelayDuration(), - } -} - -func new_FailureCase_BLACKHOLE_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT() Failure { - ff := failureByFunc{ - failureCase: rpcpb.FailureCase_BLACKHOLE_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT, - injectMember: inject_BLACKHOLE_PEER_PORT_TX_RX, - recoverMember: recover_BLACKHOLE_PEER_PORT_TX_RX, - } - f := &failureLeader{ff, -1, -1} - return &failureUntilSnapshot{ - failureCase: rpcpb.FailureCase_BLACKHOLE_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT, - Failure: f, - } -} - -func new_FailureCase_BLACKHOLE_PEER_PORT_TX_RX_QUORUM(clus *Cluster) Failure { - f := &failureQuorum{ - failureCase: rpcpb.FailureCase_BLACKHOLE_PEER_PORT_TX_RX_QUORUM, - injectMember: inject_BLACKHOLE_PEER_PORT_TX_RX, - recoverMember: recover_BLACKHOLE_PEER_PORT_TX_RX, - } - return &failureDelay{ - Failure: f, - delayDuration: clus.GetFailureDelayDuration(), - } -} - -func new_FailureCase_BLACKHOLE_PEER_PORT_TX_RX_ALL(clus *Cluster) Failure { - f := &failureAll{ - failureCase: rpcpb.FailureCase_BLACKHOLE_PEER_PORT_TX_RX_ALL, - injectMember: inject_BLACKHOLE_PEER_PORT_TX_RX, - recoverMember: recover_BLACKHOLE_PEER_PORT_TX_RX, - } - return &failureDelay{ - Failure: f, - delayDuration: clus.GetFailureDelayDuration(), - } -} diff --git a/functional/tester/failure_case_no_fail.go b/functional/tester/failure_case_no_fail.go deleted file mode 100644 index 4ae91ea1721..00000000000 --- a/functional/tester/failure_case_no_fail.go +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright 2018 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tester - -import ( - "time" - - "github.com/coreos/etcd/functional/rpcpb" - - "go.uber.org/zap" -) - -type failureNoFailWithStress failureByFunc - -func (f *failureNoFailWithStress) Inject(clus *Cluster) error { - return nil -} - -func (f *failureNoFailWithStress) Recover(clus *Cluster) error { - return nil -} - -func (f *failureNoFailWithStress) Desc() string { - if f.desc != "" { - return f.desc - } - return f.failureCase.String() -} - -func (f *failureNoFailWithStress) FailureCase() rpcpb.FailureCase { - return f.failureCase -} - -func new_FailureCase_NO_FAIL_WITH_STRESS(clus *Cluster) Failure { - f := &failureNoFailWithStress{ - failureCase: rpcpb.FailureCase_NO_FAIL_WITH_STRESS, - } - return &failureDelay{ - Failure: f, - delayDuration: clus.GetFailureDelayDuration(), - } -} - -type failureNoFailWithNoStressForLiveness failureByFunc - -func (f *failureNoFailWithNoStressForLiveness) Inject(clus *Cluster) error { - clus.lg.Info( - "extra delay for liveness mode with no stresser", - zap.Int("round", clus.rd), - zap.Int("case", clus.cs), - zap.String("desc", f.Desc()), - ) - time.Sleep(clus.GetFailureDelayDuration()) - - clus.lg.Info( - "wait health in liveness mode", - zap.Int("round", clus.rd), - zap.Int("case", clus.cs), - zap.String("desc", f.Desc()), - ) - return clus.WaitHealth() -} - -func (f *failureNoFailWithNoStressForLiveness) Recover(clus *Cluster) error { - return nil -} - -func (f *failureNoFailWithNoStressForLiveness) Desc() string { - if f.desc != "" { - return f.desc - } - return f.failureCase.String() -} - -func (f *failureNoFailWithNoStressForLiveness) FailureCase() rpcpb.FailureCase { - return f.failureCase -} - -func new_FailureCase_NO_FAIL_WITH_NO_STRESS_FOR_LIVENESS(clus *Cluster) Failure { - f := &failureNoFailWithNoStressForLiveness{ - failureCase: rpcpb.FailureCase_NO_FAIL_WITH_NO_STRESS_FOR_LIVENESS, - } - return &failureDelay{ - Failure: f, - delayDuration: clus.GetFailureDelayDuration(), - } -} diff --git a/functional/tester/failure_case_sigterm.go b/functional/tester/failure_case_sigterm.go deleted file mode 100644 index 78afec006a9..00000000000 --- a/functional/tester/failure_case_sigterm.go +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright 2018 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tester - -import "github.com/coreos/etcd/functional/rpcpb" - -func inject_SIGTERM_ETCD(clus *Cluster, idx int) error { - return clus.sendOp(idx, rpcpb.Operation_SIGTERM_ETCD) -} - -func recover_SIGTERM_ETCD(clus *Cluster, idx int) error { - return clus.sendOp(idx, rpcpb.Operation_RESTART_ETCD) -} - -func new_FailureCase_SIGTERM_ONE_FOLLOWER(clus *Cluster) Failure { - ff := failureByFunc{ - failureCase: rpcpb.FailureCase_SIGTERM_ONE_FOLLOWER, - injectMember: inject_SIGTERM_ETCD, - recoverMember: recover_SIGTERM_ETCD, - } - f := &failureFollower{ff, -1, -1} - return &failureDelay{ - Failure: f, - delayDuration: clus.GetFailureDelayDuration(), - } -} - -func new_FailureCase_SIGTERM_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT(clus *Cluster) Failure { - return &failureUntilSnapshot{ - failureCase: rpcpb.FailureCase_SIGTERM_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT, - Failure: new_FailureCase_SIGTERM_ONE_FOLLOWER(clus), - } -} - -func new_FailureCase_SIGTERM_LEADER(clus *Cluster) Failure { - ff := failureByFunc{ - failureCase: rpcpb.FailureCase_SIGTERM_LEADER, - injectMember: inject_SIGTERM_ETCD, - recoverMember: recover_SIGTERM_ETCD, - } - f := &failureLeader{ff, -1, -1} - return &failureDelay{ - Failure: f, - delayDuration: clus.GetFailureDelayDuration(), - } -} - -func new_FailureCase_SIGTERM_LEADER_UNTIL_TRIGGER_SNAPSHOT(clus *Cluster) Failure { - return &failureUntilSnapshot{ - failureCase: rpcpb.FailureCase_SIGTERM_LEADER_UNTIL_TRIGGER_SNAPSHOT, - Failure: new_FailureCase_SIGTERM_LEADER(clus), - } -} - -func new_FailureCase_SIGTERM_QUORUM(clus *Cluster) Failure { - f := &failureQuorum{ - failureCase: rpcpb.FailureCase_SIGTERM_QUORUM, - injectMember: inject_SIGTERM_ETCD, - recoverMember: recover_SIGTERM_ETCD, - } - return &failureDelay{ - Failure: f, - delayDuration: clus.GetFailureDelayDuration(), - } -} - -func new_FailureCase_SIGTERM_ALL(clus *Cluster) Failure { - f := &failureAll{ - failureCase: rpcpb.FailureCase_SIGTERM_ALL, - injectMember: inject_SIGTERM_ETCD, - recoverMember: recover_SIGTERM_ETCD, - } - return &failureDelay{ - Failure: f, - delayDuration: clus.GetFailureDelayDuration(), - } -} diff --git a/functional/tester/stress.go b/functional/tester/stresser.go similarity index 87% rename from functional/tester/stress.go rename to functional/tester/stresser.go index 7671853b3f9..b74b84b158e 100644 --- a/functional/tester/stress.go +++ b/functional/tester/stresser.go @@ -33,14 +33,12 @@ type Stresser interface { Close() map[string]int // ModifiedKeys reports the number of keys created and deleted by stresser ModifiedKeys() int64 - // Checker returns an invariant checker for after the stresser is canceled. - Checker() Checker } // newStresser creates stresser from a comma separated list of stresser types. -func newStresser(clus *Cluster, m *rpcpb.Member) Stresser { - stressers := make([]Stresser, len(clus.Tester.StressTypes)) - for i, stype := range clus.Tester.StressTypes { +func newStresser(clus *Cluster, m *rpcpb.Member) (stressers []Stresser) { + stressers = make([]Stresser, len(clus.Tester.Stressers)) + for i, stype := range clus.Tester.Stressers { clus.lg.Info( "creating stresser", zap.String("type", stype), @@ -52,7 +50,7 @@ func newStresser(clus *Cluster, m *rpcpb.Member) Stresser { // TODO: Too intensive stressing clients can panic etcd member with // 'out of memory' error. Put rate limits in server side. stressers[i] = &keyStresser{ - stype: rpcpb.StressType_KV, + stype: rpcpb.Stresser_KV, lg: clus.lg, m: m, keySize: int(clus.Tester.StressKeySize), @@ -66,7 +64,7 @@ func newStresser(clus *Cluster, m *rpcpb.Member) Stresser { case "LEASE": stressers[i] = &leaseStresser{ - stype: rpcpb.StressType_LEASE, + stype: rpcpb.Stresser_LEASE, lg: clus.lg, m: m, numLeases: 10, // TODO: configurable @@ -86,7 +84,8 @@ func newStresser(clus *Cluster, m *rpcpb.Member) Stresser { "--req-rate", fmt.Sprintf("%v", reqRate), } stressers[i] = newRunnerStresser( - rpcpb.StressType_ELECTION_RUNNER, + rpcpb.Stresser_ELECTION_RUNNER, + m.EtcdClientEndpoint, clus.lg, clus.Tester.RunnerExecPath, args, @@ -107,7 +106,8 @@ func newStresser(clus *Cluster, m *rpcpb.Member) Stresser { "--req-rate", fmt.Sprintf("%v", reqRate), } stressers[i] = newRunnerStresser( - rpcpb.StressType_WATCH_RUNNER, + rpcpb.Stresser_WATCH_RUNNER, + m.EtcdClientEndpoint, clus.lg, clus.Tester.RunnerExecPath, args, @@ -126,7 +126,8 @@ func newStresser(clus *Cluster, m *rpcpb.Member) Stresser { "--req-rate", fmt.Sprintf("%v", reqRate), } stressers[i] = newRunnerStresser( - rpcpb.StressType_LOCK_RACER_RUNNER, + rpcpb.Stresser_LOCK_RACER_RUNNER, + m.EtcdClientEndpoint, clus.lg, clus.Tester.RunnerExecPath, args, @@ -141,7 +142,8 @@ func newStresser(clus *Cluster, m *rpcpb.Member) Stresser { "--endpoints", m.EtcdClientEndpoint, } stressers[i] = newRunnerStresser( - rpcpb.StressType_LEASE_RUNNER, + rpcpb.Stresser_LEASE_RUNNER, + m.EtcdClientEndpoint, clus.lg, clus.Tester.RunnerExecPath, args, @@ -150,5 +152,5 @@ func newStresser(clus *Cluster, m *rpcpb.Member) Stresser { ) } } - return &compositeStresser{stressers} + return stressers } diff --git a/functional/tester/stress_composite.go b/functional/tester/stresser_composite.go similarity index 87% rename from functional/tester/stress_composite.go rename to functional/tester/stresser_composite.go index c19f764ffcd..6492458a2ee 100644 --- a/functional/tester/stress_composite.go +++ b/functional/tester/stresser_composite.go @@ -74,16 +74,3 @@ func (cs *compositeStresser) ModifiedKeys() (modifiedKey int64) { } return modifiedKey } - -func (cs *compositeStresser) Checker() Checker { - var chks []Checker - for _, s := range cs.stressers { - if chk := s.Checker(); chk != nil { - chks = append(chks, chk) - } - } - if len(chks) == 0 { - return nil - } - return newCompositeChecker(chks) -} diff --git a/functional/tester/stress_key.go b/functional/tester/stresser_key.go similarity index 99% rename from functional/tester/stress_key.go rename to functional/tester/stresser_key.go index 509748b8a88..2fc1bf2b0ce 100644 --- a/functional/tester/stress_key.go +++ b/functional/tester/stresser_key.go @@ -35,7 +35,7 @@ import ( ) type keyStresser struct { - stype rpcpb.StressType + stype rpcpb.Stresser lg *zap.Logger m *rpcpb.Member @@ -204,8 +204,6 @@ func (s *keyStresser) ModifiedKeys() int64 { return atomic.LoadInt64(&s.atomicModifiedKeys) } -func (s *keyStresser) Checker() Checker { return nil } - type stressFunc func(ctx context.Context) (err error, modifiedKeys int64) type stressEntry struct { diff --git a/functional/tester/stress_lease.go b/functional/tester/stresser_lease.go similarity index 99% rename from functional/tester/stress_lease.go rename to functional/tester/stresser_lease.go index c3797f47f98..8510a076536 100644 --- a/functional/tester/stress_lease.go +++ b/functional/tester/stresser_lease.go @@ -38,7 +38,7 @@ const ( ) type leaseStresser struct { - stype rpcpb.StressType + stype rpcpb.Stresser lg *zap.Logger m *rpcpb.Member @@ -485,7 +485,3 @@ func (ls *leaseStresser) Close() map[string]int { func (ls *leaseStresser) ModifiedKeys() int64 { return atomic.LoadInt64(&ls.atomicModifiedKey) } - -func (ls *leaseStresser) Checker() Checker { - return &leaseChecker{lg: ls.lg, m: ls.m, ls: ls} -} diff --git a/functional/tester/stress_runner.go b/functional/tester/stresser_runner.go similarity index 85% rename from functional/tester/stress_runner.go rename to functional/tester/stresser_runner.go index d52b94cb873..18487f402b7 100644 --- a/functional/tester/stress_runner.go +++ b/functional/tester/stresser_runner.go @@ -27,8 +27,9 @@ import ( ) type runnerStresser struct { - stype rpcpb.StressType - lg *zap.Logger + stype rpcpb.Stresser + etcdClientEndpoint string + lg *zap.Logger cmd *exec.Cmd cmdStr string @@ -41,7 +42,8 @@ type runnerStresser struct { } func newRunnerStresser( - stype rpcpb.StressType, + stype rpcpb.Stresser, + ep string, lg *zap.Logger, cmdStr string, args []string, @@ -50,13 +52,14 @@ func newRunnerStresser( ) *runnerStresser { rl.SetLimit(rl.Limit() - rate.Limit(reqRate)) return &runnerStresser{ - stype: stype, - cmdStr: cmdStr, - args: args, - rl: rl, - reqRate: reqRate, - errc: make(chan error, 1), - donec: make(chan struct{}), + stype: stype, + etcdClientEndpoint: ep, + cmdStr: cmdStr, + args: args, + rl: rl, + reqRate: reqRate, + errc: make(chan error, 1), + donec: make(chan struct{}), } } @@ -115,7 +118,3 @@ func (rs *runnerStresser) Close() map[string]int { func (rs *runnerStresser) ModifiedKeys() int64 { return 1 } - -func (rs *runnerStresser) Checker() Checker { - return &runnerChecker{rs.errc} -} diff --git a/test b/test index ab31ad1ca7b..1a9f843b68d 100755 --- a/test +++ b/test @@ -180,7 +180,7 @@ function integration_extra { function functional_pass { # Clean up any data and logs from previous runs - rm -rf /tmp/etcd-agent-data-* /tmp/etcd-agent-data-*.backup + rm -rf /tmp/etcd-functional-* /tmp/etcd-functional-*.backup for a in 1 2 3; do ./bin/etcd-agent --network tcp --address 127.0.0.1:${a}9027 &