From 8b2aa7ca2f16d3867f9fa5b82e47877302cc2547 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Fri, 25 Oct 2024 03:32:37 +0000 Subject: [PATCH 1/9] Bump etcd to v3.5.16-k3s1 Signed-off-by: Brad Davidson (cherry picked from commit e34fe0343f420e004340155c6efc2742907d2dfb) Signed-off-by: Brad Davidson --- go.mod | 16 ++++++++-------- go.sum | 32 ++++++++++++++++---------------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/go.mod b/go.mod index 56f7948827d4..9f26943e5367 100644 --- a/go.mod +++ b/go.mod @@ -21,14 +21,14 @@ replace ( github.com/prometheus/common => github.com/prometheus/common v0.55.0 github.com/spegel-org/spegel => github.com/k3s-io/spegel v0.0.23-0.20240516234953-f3d2c4072314 github.com/ugorji/go => github.com/ugorji/go v1.2.11 - go.etcd.io/etcd/api/v3 => github.com/k3s-io/etcd/api/v3 v3.5.13-k3s1 - go.etcd.io/etcd/client/pkg/v3 => github.com/k3s-io/etcd/client/pkg/v3 v3.5.13-k3s1 - go.etcd.io/etcd/client/v2 => github.com/k3s-io/etcd/client/v2 v2.305.13-k3s1 - go.etcd.io/etcd/client/v3 => github.com/k3s-io/etcd/client/v3 v3.5.13-k3s1 - go.etcd.io/etcd/etcdutl/v3 => github.com/k3s-io/etcd/etcdutl/v3 v3.5.13-k3s1 - go.etcd.io/etcd/pkg/v3 => github.com/k3s-io/etcd/pkg/v3 v3.5.13-k3s1 - go.etcd.io/etcd/raft/v3 => github.com/k3s-io/etcd/raft/v3 v3.5.13-k3s1 - go.etcd.io/etcd/server/v3 => github.com/k3s-io/etcd/server/v3 v3.5.13-k3s1 + go.etcd.io/etcd/api/v3 => github.com/k3s-io/etcd/api/v3 v3.5.16-k3s1 + go.etcd.io/etcd/client/pkg/v3 => github.com/k3s-io/etcd/client/pkg/v3 v3.5.16-k3s1 + go.etcd.io/etcd/client/v2 => github.com/k3s-io/etcd/client/v2 v2.305.16-k3s1 + go.etcd.io/etcd/client/v3 => github.com/k3s-io/etcd/client/v3 v3.5.16-k3s1 + go.etcd.io/etcd/etcdutl/v3 => github.com/k3s-io/etcd/etcdutl/v3 v3.5.16-k3s1 + go.etcd.io/etcd/pkg/v3 => github.com/k3s-io/etcd/pkg/v3 v3.5.16-k3s1 + go.etcd.io/etcd/raft/v3 => github.com/k3s-io/etcd/raft/v3 v3.5.16-k3s1 + go.etcd.io/etcd/server/v3 => github.com/k3s-io/etcd/server/v3 v3.5.16-k3s1 go.opentelemetry.io/contrib/instrumentation/github.com/emicklei/go-restful/otelrestful => go.opentelemetry.io/contrib/instrumentation/github.com/emicklei/go-restful/otelrestful v0.44.0 go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc => go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.46.0 golang.org/x/crypto => golang.org/x/crypto v0.24.0 diff --git a/go.sum b/go.sum index 2d13ea3ba0da..084e4a04d04e 100644 --- a/go.sum +++ b/go.sum @@ -972,22 +972,22 @@ github.com/k3s-io/cri-dockerd v0.3.15-k3s1.31-3 h1:TH3zSbIM9zSZMeWKcWjQqeja3FsmJ github.com/k3s-io/cri-dockerd v0.3.15-k3s1.31-3/go.mod h1:ny6wyM7fqfew5FABQ+MtKaU07rhsHhlXr/jtFsA2m8Y= github.com/k3s-io/cri-tools v1.31.0-k3s2 h1:nekOdJe5Hecm+C5eswg688uXTI0enUZOJYadmyU9pYw= github.com/k3s-io/cri-tools v1.31.0-k3s2/go.mod h1:PvPf/fN5FiNdK1v43jCydRNRw6631qGTSEOhv/OsjYU= -github.com/k3s-io/etcd/api/v3 v3.5.13-k3s1 h1:aq6fxlEKdwCooLE3HOR6227U51DEvOw3DEbriJxD2QM= -github.com/k3s-io/etcd/api/v3 v3.5.13-k3s1/go.mod h1:gBqlqkcMMZMVTMm4NDZloEVJzxQOQIls8splbqBDa0c= -github.com/k3s-io/etcd/client/pkg/v3 v3.5.13-k3s1 h1:t2I25UtBvohVAhlyXpYjd/Lznm+ybxNhvs3cnEGsF4Y= -github.com/k3s-io/etcd/client/pkg/v3 v3.5.13-k3s1/go.mod h1:XxHT4u1qU12E2+po+UVPrEeL94Um6zL58ppuJWXSAB8= -github.com/k3s-io/etcd/client/v2 v2.305.13-k3s1 h1:lvIdlAI6xRIHSUJC43sJx9lmxehq2quGb+8z5TJldGg= -github.com/k3s-io/etcd/client/v2 v2.305.13-k3s1/go.mod h1:iQnL7fepbiomdXMb3om1rHq96htNNGv2sJkEcZGDRRg= -github.com/k3s-io/etcd/client/v3 v3.5.13-k3s1 h1:/D6KAEGVzwivnjxZ5CzVIykVloLoKB/TBeKw2tKKVQ0= -github.com/k3s-io/etcd/client/v3 v3.5.13-k3s1/go.mod h1:cqiAeY8b5DEEcpxvgWKsbLIWNM/8Wy2xJSDMtioMcoI= -github.com/k3s-io/etcd/etcdutl/v3 v3.5.13-k3s1 h1:fIt+PVHCeINM5fl9OfMI+o9BJKf951pRiVcCytFW97c= -github.com/k3s-io/etcd/etcdutl/v3 v3.5.13-k3s1/go.mod h1:2vhvTIQobP+Cb04qzlcbKGvX6J5oq/N1kquk1yCDIQY= -github.com/k3s-io/etcd/pkg/v3 v3.5.13-k3s1 h1:uLU/SnBuhtSkdBk830x0pseHSsQQvh99C3deG6nc9d0= -github.com/k3s-io/etcd/pkg/v3 v3.5.13-k3s1/go.mod h1:N+4PLrp7agI/Viy+dUYpX7iRtSPvKq+w8Y14d1vX+m0= -github.com/k3s-io/etcd/raft/v3 v3.5.13-k3s1 h1:yexUwAPPdmYfIMWOj6sSyJ2nEe8QOrFzNuvYGRAsm5E= -github.com/k3s-io/etcd/raft/v3 v3.5.13-k3s1/go.mod h1:uUFibGLn2Ksm2URMxN1fICGhk8Wu96EfDQyuLhAcAmw= -github.com/k3s-io/etcd/server/v3 v3.5.13-k3s1 h1:Pqcxkg7V60c26ZpHoekP9QoUdLuduxFn827A/5CIwm4= -github.com/k3s-io/etcd/server/v3 v3.5.13-k3s1/go.mod h1:K/8nbsGupHqmr5MkgaZpLlH1QdX1pcNQLAkODy44XcQ= +github.com/k3s-io/etcd/api/v3 v3.5.16-k3s1 h1:RNExemPFr4S+VqJ2jXVf0Y9iXaps0pTeklSN735z0Mw= +github.com/k3s-io/etcd/api/v3 v3.5.16-k3s1/go.mod h1:1P4SlIP/VwkDmGo3OlOD7faPeP8KDIFhqvciH5EfN28= +github.com/k3s-io/etcd/client/pkg/v3 v3.5.16-k3s1 h1:wEmVFnZ+h3v5ECRmX6jf4SeIykDQei+DRnBczM23YQA= +github.com/k3s-io/etcd/client/pkg/v3 v3.5.16-k3s1/go.mod h1:V8acl8pcEK0Y2g19YlOV9m9ssUe6MgiDSobSoaBAM0E= +github.com/k3s-io/etcd/client/v2 v2.305.16-k3s1 h1:f7qWAqVhxrMdBt0coehUYfP0Cix7clL2ko/XCqvWols= +github.com/k3s-io/etcd/client/v2 v2.305.16-k3s1/go.mod h1:h9YxWCzcdvZENbfzBTFCnoNumr2ax3F19sKMqHFmXHE= +github.com/k3s-io/etcd/client/v3 v3.5.16-k3s1 h1:ON2Cd0Fx+qQ53GS6qK6Mr9fh7MFCShZfw+rsrLZ6j5M= +github.com/k3s-io/etcd/client/v3 v3.5.16-k3s1/go.mod h1:X+rExSGkyqxvu276cr2OwPLBaeqFu1cIl4vmRjAD/50= +github.com/k3s-io/etcd/etcdutl/v3 v3.5.16-k3s1 h1:qUuDUhfhOZ3D6/XsW04jV514+DhV7R669+/+3n9i7VY= +github.com/k3s-io/etcd/etcdutl/v3 v3.5.16-k3s1/go.mod h1:X22QojXcHZNS3TPAitpcYW7rwTvnmchFwAKkSSz0Ncw= +github.com/k3s-io/etcd/pkg/v3 v3.5.16-k3s1 h1:4nDx3la68jehJfqWPs1Yx1clPW7938pKQXrVxp2OgyA= +github.com/k3s-io/etcd/pkg/v3 v3.5.16-k3s1/go.mod h1:+lutCZHG5MBBFI/U4eYT5yL7sJfnexsoM20Y0t2uNuY= +github.com/k3s-io/etcd/raft/v3 v3.5.16-k3s1 h1:nD/YzAeIbEcSkXAQFRwAs/2zc2vXAkKmQnDKf6UDCxY= +github.com/k3s-io/etcd/raft/v3 v3.5.16-k3s1/go.mod h1:P4UP14AxofMJ/54boWilabqqWoW9eLodl6I5GdGzazI= +github.com/k3s-io/etcd/server/v3 v3.5.16-k3s1 h1:9c0DChFw6WRz6r+eCuVLBltZcRwT6h1l79biTPuAGR0= +github.com/k3s-io/etcd/server/v3 v3.5.16-k3s1/go.mod h1:ynhyZZpdDp1Gq49jkUg5mfkDWZwXnn3eIqCqtJnrD/s= github.com/k3s-io/helm-controller v0.16.5 h1:SsUHfksQXNwePkswv4a970EGD2h0Exsf6t3IdXhpXRo= github.com/k3s-io/helm-controller v0.16.5/go.mod h1:AcSxEhOIUgeVvBTnJOAwcezBZXtYew/RhKwO5xp3RlM= github.com/k3s-io/kine v0.13.2 h1:l++g2KY/3UaPJiGpgYuGoqaaYKeMpVj9fP/yfnSxHxo= From 9e6e0a61d7a6b0d5e9cb00b43622dc0899628c89 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Thu, 24 Oct 2024 00:16:34 +0000 Subject: [PATCH 2/9] Fix issues with defragment and alarm clear on etcd startup * Use clientv3.NewCtxClient instead of New to avoid automatic retry of all RPCs * Only timeout status requests; allow defrag and alarm clear requests to run to completion. * Only clear alarms on the local cluster member, not ALL cluster members Signed-off-by: Brad Davidson (cherry picked from commit 095e34d816e672cfc579a93e7dacab5bfc0144b0) Signed-off-by: Brad Davidson --- go.mod | 4 +- pkg/etcd/etcd.go | 179 +++++++++++++++++++++++++++---------------- pkg/etcd/resolver.go | 80 +++++++++++++++++++ pkg/etcd/snapshot.go | 4 +- 4 files changed, 195 insertions(+), 72 deletions(-) create mode 100644 pkg/etcd/resolver.go diff --git a/go.mod b/go.mod index 9f26943e5367..32134862c961 100644 --- a/go.mod +++ b/go.mod @@ -135,9 +135,11 @@ require ( github.com/vishvananda/netlink v1.2.1-beta.2 github.com/yl2chen/cidranger v1.0.2 go.etcd.io/etcd/api/v3 v3.5.16 + go.etcd.io/etcd/client/pkg/v3 v3.5.16 go.etcd.io/etcd/client/v3 v3.5.16 go.etcd.io/etcd/etcdutl/v3 v3.5.13 go.etcd.io/etcd/server/v3 v3.5.16 + go.uber.org/zap v1.27.0 golang.org/x/crypto v0.27.0 golang.org/x/net v0.28.0 golang.org/x/sync v0.8.0 @@ -423,7 +425,6 @@ require ( github.com/xlab/treeprint v1.2.0 // indirect github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect go.etcd.io/bbolt v1.3.11 // indirect - go.etcd.io/etcd/client/pkg/v3 v3.5.16 // indirect go.etcd.io/etcd/client/v2 v2.305.16 // indirect go.etcd.io/etcd/pkg/v3 v3.5.16 // indirect go.etcd.io/etcd/raft/v3 v3.5.16 // indirect @@ -445,7 +446,6 @@ require ( go.uber.org/fx v1.20.1 // indirect go.uber.org/mock v0.4.0 // indirect go.uber.org/multierr v1.11.0 // indirect - go.uber.org/zap v1.27.0 // indirect golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect golang.org/x/mod v0.20.0 // indirect golang.org/x/oauth2 v0.21.0 // indirect diff --git a/pkg/etcd/etcd.go b/pkg/etcd/etcd.go index 34eafdd7c070..dfcce5433658 100644 --- a/pkg/etcd/etcd.go +++ b/pkg/etcd/etcd.go @@ -41,8 +41,15 @@ import ( "github.com/sirupsen/logrus" "go.etcd.io/etcd/api/v3/etcdserverpb" "go.etcd.io/etcd/api/v3/v3rpc/rpctypes" + "go.etcd.io/etcd/client/pkg/v3/logutil" clientv3 "go.etcd.io/etcd/client/v3" + "go.etcd.io/etcd/client/v3/credentials" snapshotv3 "go.etcd.io/etcd/etcdutl/v3/snapshot" + "go.etcd.io/etcd/server/v3/etcdserver" + "go.uber.org/zap/zapcore" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" + "google.golang.org/grpc/keepalive" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" @@ -55,7 +62,7 @@ import ( ) const ( - testTimeout = time.Second * 30 + statusTimeout = time.Second * 30 manageTickerTime = time.Second * 15 learnerMaxStallTime = time.Minute * 5 memberRemovalTimeout = time.Minute * 1 @@ -206,35 +213,40 @@ func (e *ETCD) Test(ctx context.Context) error { return errors.New("etcd datastore is not started") } - ctx, cancel := context.WithTimeout(ctx, testTimeout) - defer cancel() - - endpoints := getEndpoints(e.config) - status, err := e.client.Status(ctx, endpoints[0]) + status, err := e.status(ctx) if err != nil { - return err + return errors.Wrap(err, "failed to get etcd status") + } else if status.IsLearner { + return errors.New("this server has not yet been promoted from learner to voting member") + } else if status.Leader == 0 { + return etcdserver.ErrNoLeader } - if status.IsLearner { - return errors.New("this server has not yet been promoted from learner to voting member") + logrus.Infof("Connected to etcd v%s - datastore using %d of %d bytes", status.Version, status.DbSizeInUse, status.DbSize) + if len(status.Errors) > 0 { + logrus.Warnf("Errors present on etcd cluster: %s", strings.Join(status.Errors, ",")) } + // defrag this node to reclaim freed space from compacted revisions if err := e.defragment(ctx); err != nil { return errors.Wrap(err, "failed to defragment etcd database") } - if err := e.clearAlarms(ctx); err != nil { - return errors.Wrap(err, "failed to report and disarm etcd alarms") + // clear alarms on this node + if err := e.clearAlarms(ctx, status.Header.MemberId); err != nil { + return errors.Wrap(err, "failed to disarm etcd alarms") } - // refresh status to see if any errors remain after clearing alarms - status, err = e.client.Status(ctx, endpoints[0]) + // refresh status - note that errors may remain on other nodes, but this + // should not prevent us from continuing with startup. + status, err = e.status(ctx) if err != nil { - return err + return errors.Wrap(err, "failed to get etcd status") } + logrus.Infof("Datastore using %d of %d bytes after defragment", status.DbSizeInUse, status.DbSize) if len(status.Errors) > 0 { - return fmt.Errorf("etcd cluster errors: %s", strings.Join(status.Errors, ", ")) + logrus.Warnf("Errors present on etcd cluster after defragment: %s", strings.Join(status.Errors, ",")) } members, err := e.client.MemberList(ctx) @@ -242,6 +254,7 @@ func (e *ETCD) Test(ctx context.Context) error { return err } + // Ensure that there is a cluster member with our peerURL and name var memberNameUrls []string for _, member := range members.Members { for _, peerURL := range member.PeerURLs { @@ -253,6 +266,8 @@ func (e *ETCD) Test(ctx context.Context) error { memberNameUrls = append(memberNameUrls, member.Name+"="+member.PeerURLs[0]) } } + + // no matching PeerURL on any Member, return an error that indicates what was expected vs what we found. return &membershipError{members: memberNameUrls, self: e.name + "=" + e.peerURL()} } @@ -523,7 +538,7 @@ func (e *ETCD) startClient(ctx context.Context) error { e.config.Datastore.BackendTLSConfig.CertFile = e.config.Runtime.ClientETCDCert e.config.Datastore.BackendTLSConfig.KeyFile = e.config.Runtime.ClientETCDKey - client, err := getClient(ctx, e.config, endpoints...) + client, conn, err := getClient(ctx, e.config, endpoints...) if err != nil { return err } @@ -531,9 +546,8 @@ func (e *ETCD) startClient(ctx context.Context) error { go func() { <-ctx.Done() - client := e.client e.client = nil - client.Close() + conn.Close() }() return nil @@ -554,11 +568,11 @@ func (e *ETCD) join(ctx context.Context, clientAccessInfo *clientaccess.Info) er return err } - client, err := getClient(clientCtx, e.config, clientURLs...) + client, conn, err := getClient(clientCtx, e.config, clientURLs...) if err != nil { return err } - defer client.Close() + defer conn.Close() for _, member := range memberList.Members { for _, peer := range member.PeerURLs { @@ -725,13 +739,53 @@ func (e *ETCD) infoHandler() http.Handler { // If the runtime config does not list any endpoints, the default endpoint is used. // The returned client should be closed when no longer needed, in order to avoid leaking GRPC // client goroutines. -func getClient(ctx context.Context, control *config.Control, endpoints ...string) (*clientv3.Client, error) { +func getClient(ctx context.Context, control *config.Control, endpoints ...string) (*clientv3.Client, *grpc.ClientConn, error) { + logger, err := logutil.CreateDefaultZapLogger(zapcore.DebugLevel) + if err != nil { + return nil, nil, err + } + cfg, err := getClientConfig(ctx, control, endpoints...) if err != nil { - return nil, err + return nil, nil, err + } + + // Set up dialer and resolver options. + // This is normally handled by clientv3.New() but that wraps all the GRPC + // service with retry handlers and uses deprecated grpc.DialContext() which + // tries to establish a connection even when one isn't wanted. + if cfg.DialKeepAliveTime > 0 { + params := keepalive.ClientParameters{ + Time: cfg.DialKeepAliveTime, + Timeout: cfg.DialKeepAliveTimeout, + PermitWithoutStream: cfg.PermitWithoutStream, + } + cfg.DialOptions = append(cfg.DialOptions, grpc.WithKeepaliveParams(params)) + } + + if cfg.TLS != nil { + creds := credentials.NewBundle(credentials.Config{TLSConfig: cfg.TLS}).TransportCredentials() + cfg.DialOptions = append(cfg.DialOptions, grpc.WithTransportCredentials(creds)) + } else { + cfg.DialOptions = append(cfg.DialOptions, grpc.WithTransportCredentials(insecure.NewCredentials())) } - return clientv3.New(*cfg) + cfg.DialOptions = append(cfg.DialOptions, grpc.WithResolvers(NewSimpleResolver(cfg.Endpoints[0]))) + + target := fmt.Sprintf("%s://%p/%s", scheme, cfg, authority(cfg.Endpoints[0])) + conn, err := grpc.NewClient(target, cfg.DialOptions...) + if err != nil { + return nil, nil, err + } + + // Create a new client and wire up the GRPC service interfaces. + // Ref: https://github.com/etcd-io/etcd/blob/v3.5.16/client/v3/client.go#L87 + client := clientv3.NewCtxClient(ctx, clientv3.WithZapLogger(logger.Named(version.Program+"-etcd-client"))) + client.Cluster = clientv3.NewClusterFromClusterClient(etcdserverpb.NewClusterClient(conn), client) + client.KV = clientv3.NewKVFromKVClient(etcdserverpb.NewKVClient(conn), client) + client.Maintenance = clientv3.NewMaintenanceFromMaintenanceClient(etcdserverpb.NewMaintenanceClient(conn), client) + + return client, conn, nil } // getClientConfig generates an etcd client config connected to the specified endpoints. @@ -851,11 +905,11 @@ func (e *ETCD) migrateFromSQLite(ctx context.Context) error { } defer sqliteClient.Close() - etcdClient, err := getClient(ctx, e.config) + etcdClient, conn, err := getClient(ctx, e.config) if err != nil { return err } - defer etcdClient.Close() + defer conn.Close() values, err := sqliteClient.List(ctx, "/registry/", 0) if err != nil { @@ -984,7 +1038,7 @@ func (e *ETCD) StartEmbeddedTemporary(ctx context.Context) error { return errors.New("etcd datastore already started") } - client, err := getClient(ctx, e.config) + client, conn, err := getClient(ctx, e.config) if err != nil { return err } @@ -992,9 +1046,8 @@ func (e *ETCD) StartEmbeddedTemporary(ctx context.Context) error { go func() { <-ctx.Done() - client := e.client e.client = nil - client.Close() + conn.Close() }() if err := cp.Copy(etcdDataDir, tmpDataDir, cp.Options{PreserveOwner: true}); err != nil { @@ -1251,8 +1304,6 @@ func (e *ETCD) trackLearnerProgress(ctx context.Context, progress *learnerProgre } func (e *ETCD) getETCDStatus(ctx context.Context, url string) (*clientv3.StatusResponse, error) { - ctx, cancel := context.WithTimeout(ctx, defaultDialTimeout) - defer cancel() resp, err := e.client.Status(ctx, url) if err != nil { return resp, errors.Wrap(err, "failed to check etcd member status") @@ -1363,12 +1414,10 @@ func (e *ETCD) setLearnerProgress(ctx context.Context, status *learnerProgress) return err } -// clearAlarms checks for any alarms on the local etcd member. If found, they are -// reported and the alarm state is cleared. -func (e *ETCD) clearAlarms(ctx context.Context) error { - ctx, cancel := context.WithTimeout(ctx, testTimeout) - defer cancel() - +// clearAlarms checks for any NOSPACE alarms on the local etcd member. +// If found, they are reported and the alarm state is cleared. +// Other alarm types are not handled. +func (e *ETCD) clearAlarms(ctx context.Context, memberID uint64) error { if e.client == nil { return errors.New("etcd client was nil") } @@ -1379,22 +1428,37 @@ func (e *ETCD) clearAlarms(ctx context.Context) error { } for _, alarm := range alarmList.Alarms { - logrus.Warnf("Alarm on etcd member %d: %s", alarm.MemberID, alarm.Alarm) - } - - if len(alarmList.Alarms) > 0 { - if _, err := e.client.AlarmDisarm(ctx, &clientv3.AlarmMember{}); err != nil { - return fmt.Errorf("etcd alarm disarm failed: %v", err) + if alarm.MemberID != memberID { + // ignore alarms on other cluster members, they should manage their own problems + continue + } + if alarm.Alarm == etcdserverpb.AlarmType_NOSPACE { + if _, err := e.client.AlarmDisarm(ctx, &clientv3.AlarmMember{MemberID: alarm.MemberID, Alarm: alarm.Alarm}); err != nil { + return fmt.Errorf("%s disarm failed: %v", alarm.Alarm, err) + } + logrus.Infof("%s disarmed successfully", alarm.Alarm) + } else { + return fmt.Errorf("%s alarm must be disarmed manually", alarm.Alarm) } - logrus.Infof("Alarms disarmed on etcd server") } return nil } -func (e *ETCD) defragment(ctx context.Context) error { - ctx, cancel := context.WithTimeout(ctx, testTimeout) +// status returns status using the first etcd endpoint. +func (e *ETCD) status(ctx context.Context) (*clientv3.StatusResponse, error) { + if e.client == nil { + return nil, errors.New("etcd client was nil") + } + + ctx, cancel := context.WithTimeout(ctx, statusTimeout) defer cancel() + endpoints := getEndpoints(e.config) + return e.client.Status(ctx, endpoints[0]) +} + +// defragment defragments the etcd datastore using the first etcd endpoint +func (e *ETCD) defragment(ctx context.Context) error { if e.client == nil { return errors.New("etcd client was nil") } @@ -1550,11 +1614,11 @@ func backupDirWithRetention(dir string, maxBackupRetention int) (string, error) // GetAPIServerURLsFromETCD will try to fetch the version.Program/apiaddresses key from etcd // and unmarshal it to a list of apiserver endpoints. func GetAPIServerURLsFromETCD(ctx context.Context, cfg *config.Control) ([]string, error) { - cl, err := getClient(ctx, cfg) + cl, conn, err := getClient(ctx, cfg) if err != nil { return nil, err } - defer cl.Close() + defer conn.Close() etcdResp, err := cl.KV.Get(ctx, AddressKey) if err != nil { @@ -1576,9 +1640,6 @@ func GetAPIServerURLsFromETCD(ctx context.Context, cfg *config.Control) ([]strin // GetMembersClientURLs will list through the member lists in etcd and return // back a combined list of client urls for each member in the cluster func (e *ETCD) GetMembersClientURLs(ctx context.Context) ([]string, error) { - ctx, cancel := context.WithTimeout(ctx, testTimeout) - defer cancel() - members, err := e.client.MemberList(ctx) if err != nil { return nil, err @@ -1593,24 +1654,6 @@ func (e *ETCD) GetMembersClientURLs(ctx context.Context) ([]string, error) { return clientURLs, nil } -// GetMembersNames will list through the member lists in etcd and return -// back a combined list of member names -func (e *ETCD) GetMembersNames(ctx context.Context) ([]string, error) { - ctx, cancel := context.WithTimeout(ctx, testTimeout) - defer cancel() - - members, err := e.client.MemberList(ctx) - if err != nil { - return nil, err - } - - var memberNames []string - for _, member := range members.Members { - memberNames = append(memberNames, member.Name) - } - return memberNames, nil -} - // RemoveSelf will remove the member if it exists in the cluster. This should // only be called on a node that may have previously run etcd, but will not // currently run etcd, to ensure that it is not a member of the cluster. diff --git a/pkg/etcd/resolver.go b/pkg/etcd/resolver.go new file mode 100644 index 000000000000..b95242cbfa91 --- /dev/null +++ b/pkg/etcd/resolver.go @@ -0,0 +1,80 @@ +package etcd + +import ( + "net/url" + "path" + "strings" + + "google.golang.org/grpc/resolver" + "google.golang.org/grpc/resolver/manual" +) + +const scheme = "etcd-endpoint" + +type EtcdSimpleResolver struct { + *manual.Resolver + endpoint string +} + +// Cribbed from https://github.com/etcd-io/etcd/blob/v3.5.16/client/v3/internal/resolver/resolver.go +// but only supports a single fixed endpoint. We use this instead of the internal etcd client resolver +// because the agent loadbalancer handles failover and we don't want etcd or grpc's special behavior. +func NewSimpleResolver(endpoint string) *EtcdSimpleResolver { + r := manual.NewBuilderWithScheme(scheme) + return &EtcdSimpleResolver{Resolver: r, endpoint: endpoint} +} + +func (r *EtcdSimpleResolver) Build(target resolver.Target, cc resolver.ClientConn, opts resolver.BuildOptions) (resolver.Resolver, error) { + res, err := r.Resolver.Build(target, cc, opts) + if err != nil { + return nil, err + } + + if r.CC != nil { + addr, serverName := interpret(r.endpoint) + r.UpdateState(resolver.State{ + Addresses: []resolver.Address{{Addr: addr, ServerName: serverName}}, + }) + } + + return res, nil +} + +func interpret(ep string) (string, string) { + if strings.HasPrefix(ep, "unix:") || strings.HasPrefix(ep, "unixs:") { + if strings.HasPrefix(ep, "unix:///") || strings.HasPrefix(ep, "unixs:///") { + _, absolutePath, _ := strings.Cut(ep, "://") + return "unix://" + absolutePath, path.Base(absolutePath) + } + if strings.HasPrefix(ep, "unix://") || strings.HasPrefix(ep, "unixs://") { + _, localPath, _ := strings.Cut(ep, "://") + return "unix:" + localPath, path.Base(localPath) + } + _, localPath, _ := strings.Cut(ep, ":") + return "unix:" + localPath, path.Base(localPath) + } + if strings.Contains(ep, "://") { + url, err := url.Parse(ep) + if err != nil { + return ep, ep + } + if url.Scheme == "http" || url.Scheme == "https" { + return url.Host, url.Host + } + return ep, url.Host + } + return ep, ep +} + +func authority(ep string) string { + if _, authority, ok := strings.Cut(ep, "://"); ok { + return authority + } + if suff, ok := strings.CutPrefix(ep, "unix:"); ok { + return suff + } + if suff, ok := strings.CutPrefix(ep, "unixs:"); ok { + return suff + } + return ep +} diff --git a/pkg/etcd/snapshot.go b/pkg/etcd/snapshot.go index 8f844981f5d9..90919c2403af 100644 --- a/pkg/etcd/snapshot.go +++ b/pkg/etcd/snapshot.go @@ -27,7 +27,7 @@ import ( "github.com/pkg/errors" "github.com/robfig/cron/v3" "github.com/sirupsen/logrus" - snapshotv3 "go.etcd.io/etcd/etcdutl/v3/snapshot" + snapshotv3 "go.etcd.io/etcd/client/v3/snapshot" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -243,7 +243,7 @@ func (e *ETCD) Snapshot(ctx context.Context) (*managed.SnapshotResult, error) { var sf *snapshot.File - if err := snapshotv3.NewV3(e.client.GetLogger()).Save(ctx, *cfg, snapshotPath); err != nil { + if err := snapshotv3.Save(ctx, e.client.GetLogger(), *cfg, snapshotPath); err != nil { sf = &snapshot.File{ Name: snapshotName, Location: "", From bd8316b8f365e879df92ad0df0a796c5c391b9ac Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Thu, 24 Oct 2024 22:21:44 +0000 Subject: [PATCH 3/9] Add tests for ETCD.Test() Signed-off-by: Brad Davidson (cherry picked from commit a39e191906e0bee927196d3ebf22e32041e72995) Signed-off-by: Brad Davidson --- pkg/etcd/etcd_test.go | 523 ++++++++++++++++++++++++++++++++++++++++-- tests/unit.go | 4 + 2 files changed, 507 insertions(+), 20 deletions(-) diff --git a/pkg/etcd/etcd_test.go b/pkg/etcd/etcd_test.go index 5a519bdcffe4..f875a24ad1b7 100644 --- a/pkg/etcd/etcd_test.go +++ b/pkg/etcd/etcd_test.go @@ -6,6 +6,7 @@ import ( "net/http" "os" "path/filepath" + "sync" "testing" "time" @@ -15,11 +16,23 @@ import ( testutil "github.com/k3s-io/k3s/tests" "github.com/robfig/cron/v3" "github.com/sirupsen/logrus" + "go.etcd.io/etcd/api/v3/etcdserverpb" clientv3 "go.etcd.io/etcd/client/v3" "go.etcd.io/etcd/server/v3/etcdserver" + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/credentials" + "google.golang.org/grpc/health" + healthpb "google.golang.org/grpc/health/grpc_health_v1" + "google.golang.org/grpc/reflection" + "google.golang.org/grpc/status" utilnet "k8s.io/apimachinery/pkg/util/net" ) +func init() { + logrus.SetLevel(logrus.DebugLevel) +} + func mustGetAddress() string { ipAddr, err := utilnet.ChooseHostInterface() if err != nil { @@ -76,7 +89,7 @@ func Test_UnitETCD_IsInitialized(t *testing.T) { wantErr bool }{ { - name: "Directory exists", + name: "directory exists", args: args{ ctx: context.TODO(), config: generateTestConfig(), @@ -95,7 +108,7 @@ func Test_UnitETCD_IsInitialized(t *testing.T) { want: true, }, { - name: "Directory does not exist", + name: "directory does not exist", args: args{ ctx: context.TODO(), config: generateTestConfig(), @@ -117,9 +130,6 @@ func Test_UnitETCD_IsInitialized(t *testing.T) { }, } - // enable logging - logrus.SetLevel(logrus.DebugLevel) - for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { e := NewETCD() @@ -159,7 +169,7 @@ func Test_UnitETCD_Register(t *testing.T) { wantErr bool }{ { - name: "Call Register with standard config", + name: "standard config", args: args{ ctx: context.TODO(), config: generateTestConfig(), @@ -174,7 +184,7 @@ func Test_UnitETCD_Register(t *testing.T) { }, }, { - name: "Call Register with a tombstone file created", + name: "with a tombstone file created", args: args{ ctx: context.TODO(), config: generateTestConfig(), @@ -249,7 +259,7 @@ func Test_UnitETCD_Start(t *testing.T) { wantErr bool }{ { - name: "Start etcd without clientAccessInfo and without snapshots", + name: "nil clientAccessInfo and nil cron", fields: fields{ config: generateTestConfig(), address: mustGetAddress(), @@ -266,17 +276,18 @@ func Test_UnitETCD_Start(t *testing.T) { }, teardown: func(e *ETCD, ctxInfo *contextInfo) error { // RemoveSelf will fail with a specific error, but it still does cleanup for testing purposes - if err := e.RemoveSelf(ctxInfo.ctx); err != nil && err.Error() != etcdserver.ErrNotEnoughStartedMembers.Error() { - return err - } + err := e.RemoveSelf(ctxInfo.ctx) ctxInfo.cancel() - time.Sleep(10 * time.Second) + time.Sleep(5 * time.Second) testutil.CleanupDataDir(e.config) + if err != nil && err.Error() != etcdserver.ErrNotEnoughStartedMembers.Error() { + return err + } return nil }, }, { - name: "Start etcd without clientAccessInfo on", + name: "nil clientAccessInfo", fields: fields{ config: generateTestConfig(), address: mustGetAddress(), @@ -293,17 +304,18 @@ func Test_UnitETCD_Start(t *testing.T) { }, teardown: func(e *ETCD, ctxInfo *contextInfo) error { // RemoveSelf will fail with a specific error, but it still does cleanup for testing purposes - if err := e.RemoveSelf(ctxInfo.ctx); err != nil && err.Error() != etcdserver.ErrNotEnoughStartedMembers.Error() { - return err - } + err := e.RemoveSelf(ctxInfo.ctx) ctxInfo.cancel() time.Sleep(5 * time.Second) testutil.CleanupDataDir(e.config) + if err != nil && err.Error() != etcdserver.ErrNotEnoughStartedMembers.Error() { + return err + } return nil }, }, { - name: "Start etcd with an existing cluster", + name: "existing cluster", fields: fields{ config: generateTestConfig(), address: mustGetAddress(), @@ -322,13 +334,14 @@ func Test_UnitETCD_Start(t *testing.T) { }, teardown: func(e *ETCD, ctxInfo *contextInfo) error { // RemoveSelf will fail with a specific error, but it still does cleanup for testing purposes - if err := e.RemoveSelf(ctxInfo.ctx); err != nil && err.Error() != etcdserver.ErrNotEnoughStartedMembers.Error() { - return err - } + err := e.RemoveSelf(ctxInfo.ctx) ctxInfo.cancel() time.Sleep(5 * time.Second) testutil.CleanupDataDir(e.config) os.Remove(walDir(e.config)) + if err != nil && err.Error() != etcdserver.ErrNotEnoughStartedMembers.Error() { + return err + } return nil }, }, @@ -353,8 +366,478 @@ func Test_UnitETCD_Start(t *testing.T) { } if err := tt.teardown(e, &tt.fields.context); err != nil { t.Errorf("Teardown for ETCD.Start() failed = %v", err) + } + }) + } +} + +func Test_UnitETCD_Test(t *testing.T) { + type contextInfo struct { + ctx context.Context + cancel context.CancelFunc + } + type fields struct { + context contextInfo + client *clientv3.Client + config *config.Control + name string + address string + } + type args struct { + clientAccessInfo *clientaccess.Info + } + tests := []struct { + name string + fields fields + setup func(e *ETCD, ctxInfo *contextInfo) error + teardown func(e *ETCD, ctxInfo *contextInfo) error + wantErr bool + }{ + { + name: "no server running", + fields: fields{ + config: generateTestConfig(), + address: mustGetAddress(), + name: "default", + }, + setup: func(e *ETCD, ctxInfo *contextInfo) error { + ctxInfo.ctx, ctxInfo.cancel = context.WithCancel(context.Background()) + testutil.GenerateRuntime(e.config) + return e.startClient(ctxInfo.ctx) + }, + teardown: func(e *ETCD, ctxInfo *contextInfo) error { + ctxInfo.cancel() + time.Sleep(1 * time.Second) + testutil.CleanupDataDir(e.config) + return nil + }, + wantErr: true, + }, + { + name: "unreachable server", + fields: fields{ + config: generateTestConfig(), + address: mustGetAddress(), + name: "default", + }, + setup: func(e *ETCD, ctxInfo *contextInfo) error { + ctxInfo.ctx, ctxInfo.cancel = context.WithCancel(context.Background()) + testutil.GenerateRuntime(e.config) + e.config.Runtime.EtcdConfig.Endpoints = []string{"https://192.0.2.0:2379"} // RFC5737 + return e.startClient(ctxInfo.ctx) + }, + teardown: func(e *ETCD, ctxInfo *contextInfo) error { + ctxInfo.cancel() + time.Sleep(1 * time.Second) + testutil.CleanupDataDir(e.config) + return nil + }, + wantErr: true, + }, + { + name: "learner server", + fields: fields{ + config: generateTestConfig(), + address: mustGetAddress(), + name: "default", + }, + setup: func(e *ETCD, ctxInfo *contextInfo) error { + ctxInfo.ctx, ctxInfo.cancel = context.WithCancel(context.Background()) + testutil.GenerateRuntime(e.config) + if err := startMock(ctxInfo.ctx, e, true, false, false, time.Second); err != nil { + return err + } + return e.startClient(ctxInfo.ctx) + }, + teardown: func(e *ETCD, ctxInfo *contextInfo) error { + ctxInfo.cancel() + time.Sleep(1 * time.Second) + testutil.CleanupDataDir(e.config) + return nil + }, + wantErr: true, + }, + { + name: "corrupt server", + fields: fields{ + config: generateTestConfig(), + address: mustGetAddress(), + name: "default", + }, + setup: func(e *ETCD, ctxInfo *contextInfo) error { + ctxInfo.ctx, ctxInfo.cancel = context.WithCancel(context.Background()) + testutil.GenerateRuntime(e.config) + if err := startMock(ctxInfo.ctx, e, false, true, false, time.Second); err != nil { + return err + } + return e.startClient(ctxInfo.ctx) + }, + teardown: func(e *ETCD, ctxInfo *contextInfo) error { + ctxInfo.cancel() + time.Sleep(1 * time.Second) + testutil.CleanupDataDir(e.config) + return nil + }, + wantErr: true, + }, + { + name: "leaderless server", + fields: fields{ + config: generateTestConfig(), + address: mustGetAddress(), + name: "default", + }, + setup: func(e *ETCD, ctxInfo *contextInfo) error { + ctxInfo.ctx, ctxInfo.cancel = context.WithCancel(context.Background()) + testutil.GenerateRuntime(e.config) + if err := startMock(ctxInfo.ctx, e, false, false, true, time.Second); err != nil { + return err + } + return e.startClient(ctxInfo.ctx) + }, + teardown: func(e *ETCD, ctxInfo *contextInfo) error { + ctxInfo.cancel() + time.Sleep(1 * time.Second) + testutil.CleanupDataDir(e.config) + return nil + }, + wantErr: true, + }, + { + name: "normal server", + fields: fields{ + config: generateTestConfig(), + address: mustGetAddress(), + name: "default", + }, + setup: func(e *ETCD, ctxInfo *contextInfo) error { + ctxInfo.ctx, ctxInfo.cancel = context.WithCancel(context.Background()) + testutil.GenerateRuntime(e.config) + if err := startMock(ctxInfo.ctx, e, false, false, false, time.Second); err != nil { + return err + } + return e.startClient(ctxInfo.ctx) + }, + teardown: func(e *ETCD, ctxInfo *contextInfo) error { + ctxInfo.cancel() + time.Sleep(1 * time.Second) + testutil.CleanupDataDir(e.config) + return nil + }, + wantErr: false, + }, + { + name: "alarm on other server", + fields: fields{ + config: generateTestConfig(), + address: mustGetAddress(), + name: "default", + }, + setup: func(e *ETCD, ctxInfo *contextInfo) error { + ctxInfo.ctx, ctxInfo.cancel = context.WithCancel(context.Background()) + testutil.GenerateRuntime(e.config) + extraAlarm := &etcdserverpb.AlarmMember{MemberID: 2, Alarm: etcdserverpb.AlarmType_NOSPACE} + if err := startMock(ctxInfo.ctx, e, false, false, false, time.Second, extraAlarm); err != nil { + return err + } + return e.startClient(ctxInfo.ctx) + }, + teardown: func(e *ETCD, ctxInfo *contextInfo) error { + ctxInfo.cancel() + time.Sleep(1 * time.Second) + testutil.CleanupDataDir(e.config) + return nil + }, + wantErr: false, + }, + { + name: "slow defrag", + fields: fields{ + config: generateTestConfig(), + address: mustGetAddress(), + name: "default", + }, + setup: func(e *ETCD, ctxInfo *contextInfo) error { + ctxInfo.ctx, ctxInfo.cancel = context.WithCancel(context.Background()) + testutil.GenerateRuntime(e.config) + if err := startMock(ctxInfo.ctx, e, false, false, false, 40*time.Second); err != nil { + return err + } + return e.startClient(ctxInfo.ctx) + }, + teardown: func(e *ETCD, ctxInfo *contextInfo) error { + ctxInfo.cancel() + time.Sleep(1 * time.Second) + testutil.CleanupDataDir(e.config) + return nil + }, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + e := &ETCD{ + client: tt.fields.client, + config: tt.fields.config, + name: tt.fields.name, + address: tt.fields.address, + } + + if err := tt.setup(e, &tt.fields.context); err != nil { + t.Errorf("Setup for ETCD.Test() failed = %v", err) return } + start := time.Now() + err := e.Test(tt.fields.context.ctx) + duration := time.Now().Sub(start) + t.Logf("ETCD.Test() completed in %v with err=%v", duration, err) + if (err != nil) != tt.wantErr { + t.Errorf("ETCD.Test() error = %v, wantErr %v", err, tt.wantErr) + } + if err := tt.teardown(e, &tt.fields.context); err != nil { + t.Errorf("Teardown for ETCD.Test() failed = %v", err) + } }) } } + +// startMock starts up a mock etcd grpc service with canned responses +// that can be used to test specific scenarios. +func startMock(ctx context.Context, e *ETCD, isLearner, isCorrupt, noLeader bool, defragDelay time.Duration, extraAlarms ...*etcdserverpb.AlarmMember) error { + address := authority(getEndpoints(e.config)[0]) + // listen on endpoint and close listener on context cancel + listener, err := net.Listen("tcp", address) + if err != nil { + return err + } + + // set up tls if enabled + gopts := []grpc.ServerOption{} + if e.config.Datastore.ServerTLSConfig.CertFile != "" && e.config.Datastore.ServerTLSConfig.KeyFile != "" { + creds, err := credentials.NewServerTLSFromFile(e.config.Datastore.ServerTLSConfig.CertFile, e.config.Datastore.ServerTLSConfig.KeyFile) + if err != nil { + return err + } + gopts = append(gopts, grpc.Creds(creds)) + } + server := grpc.NewServer(gopts...) + + mock := &mockEtcd{ + e: e, + mu: &sync.RWMutex{}, + isLearner: isLearner, + isCorrupt: isCorrupt, + noLeader: noLeader, + defragDelay: defragDelay, + extraAlarms: extraAlarms, + } + + // register grpc services + etcdserverpb.RegisterKVServer(server, mock) + etcdserverpb.RegisterClusterServer(server, mock) + etcdserverpb.RegisterMaintenanceServer(server, mock) + + hsrv := health.NewServer() + hsrv.SetServingStatus("", healthpb.HealthCheckResponse_SERVING) + healthpb.RegisterHealthServer(server, hsrv) + + reflection.Register(server) + + // shutdown on context cancel + go func() { + <-ctx.Done() + server.GracefulStop() + listener.Close() + }() + + // start serving + go func() { + logrus.Infof("Mock etcd server starting on %s", listener.Addr()) + logrus.Infof("Mock etcd server exited: %v", server.Serve(listener)) + }() + + return nil +} + +type mockEtcd struct { + e *ETCD + mu *sync.RWMutex + calls map[string]int + isLearner bool + isCorrupt bool + noLeader bool + defragDelay time.Duration + extraAlarms []*etcdserverpb.AlarmMember +} + +// increment call counter for this function +func (m *mockEtcd) inc(call string) { + m.mu.Lock() + defer m.mu.Unlock() + if m.calls == nil { + m.calls = map[string]int{} + } + m.calls[call] = m.calls[call] + 1 +} + +// get call counter for this function +func (m *mockEtcd) get(call string) int { + m.mu.RLock() + defer m.mu.RUnlock() + return m.calls[call] +} + +// get alarm list +func (m *mockEtcd) alarms() []*etcdserverpb.AlarmMember { + alarms := m.extraAlarms + if m.get("alarm") < 2 { + // on the first check, return NOSPACE so that we can clear it after defragging + alarms = append(alarms, &etcdserverpb.AlarmMember{ + Alarm: etcdserverpb.AlarmType_NOSPACE, + MemberID: 1, + }) + } + if m.isCorrupt { + // return CORRUPT if so requested + alarms = append(alarms, &etcdserverpb.AlarmMember{ + Alarm: etcdserverpb.AlarmType_CORRUPT, + MemberID: 1, + }) + } + return alarms +} + +// KV mocks +func (m *mockEtcd) Range(context.Context, *etcdserverpb.RangeRequest) (*etcdserverpb.RangeResponse, error) { + m.inc("range") + return nil, unsupported("range") +} +func (m *mockEtcd) Put(context.Context, *etcdserverpb.PutRequest) (*etcdserverpb.PutResponse, error) { + m.inc("put") + return nil, unsupported("put") +} +func (m *mockEtcd) DeleteRange(context.Context, *etcdserverpb.DeleteRangeRequest) (*etcdserverpb.DeleteRangeResponse, error) { + m.inc("deleterange") + return nil, unsupported("deleterange") +} +func (m *mockEtcd) Txn(context.Context, *etcdserverpb.TxnRequest) (*etcdserverpb.TxnResponse, error) { + m.inc("txn") + return nil, unsupported("txn") +} +func (m *mockEtcd) Compact(context.Context, *etcdserverpb.CompactionRequest) (*etcdserverpb.CompactionResponse, error) { + m.inc("compact") + return nil, unsupported("compact") +} + +// Maintenance mocks +func (m *mockEtcd) Alarm(ctx context.Context, r *etcdserverpb.AlarmRequest) (*etcdserverpb.AlarmResponse, error) { + m.inc("alarm") + res := &etcdserverpb.AlarmResponse{ + Header: &etcdserverpb.ResponseHeader{ + MemberId: 1, + }, + } + if r.Action == etcdserverpb.AlarmRequest_GET { + res.Alarms = m.alarms() + } + return res, nil +} +func (m *mockEtcd) Status(context.Context, *etcdserverpb.StatusRequest) (*etcdserverpb.StatusResponse, error) { + m.inc("status") + res := &etcdserverpb.StatusResponse{ + Header: &etcdserverpb.ResponseHeader{ + MemberId: 1, + }, + Leader: 1, + Version: "v3.5.0-mock0", + DbSize: 1024, + DbSizeInUse: 512, + IsLearner: m.isLearner, + } + if m.noLeader { + res.Leader = 0 + res.Errors = append(res.Errors, etcdserver.ErrNoLeader.Error()) + } + for _, a := range m.alarms() { + res.Errors = append(res.Errors, a.String()) + } + return res, nil +} +func (m *mockEtcd) Defragment(ctx context.Context, r *etcdserverpb.DefragmentRequest) (*etcdserverpb.DefragmentResponse, error) { + m.inc("defragment") + // delay defrag response by configured time, or until the request is cancelled + select { + case <-ctx.Done(): + case <-time.After(m.defragDelay): + } + return &etcdserverpb.DefragmentResponse{ + Header: &etcdserverpb.ResponseHeader{ + MemberId: 1, + }, + }, nil +} +func (m *mockEtcd) Hash(context.Context, *etcdserverpb.HashRequest) (*etcdserverpb.HashResponse, error) { + m.inc("hash") + return nil, unsupported("hash") +} +func (m *mockEtcd) HashKV(context.Context, *etcdserverpb.HashKVRequest) (*etcdserverpb.HashKVResponse, error) { + m.inc("hashkv") + return nil, unsupported("hashkv") +} +func (m *mockEtcd) Snapshot(*etcdserverpb.SnapshotRequest, etcdserverpb.Maintenance_SnapshotServer) error { + m.inc("snapshot") + return unsupported("snapshot") +} +func (m *mockEtcd) MoveLeader(context.Context, *etcdserverpb.MoveLeaderRequest) (*etcdserverpb.MoveLeaderResponse, error) { + m.inc("moveleader") + return nil, unsupported("moveleader") +} +func (m *mockEtcd) Downgrade(context.Context, *etcdserverpb.DowngradeRequest) (*etcdserverpb.DowngradeResponse, error) { + m.inc("downgrade") + return nil, unsupported("downgrade") +} + +// Cluster mocks +func (m *mockEtcd) MemberAdd(context.Context, *etcdserverpb.MemberAddRequest) (*etcdserverpb.MemberAddResponse, error) { + m.inc("memberadd") + return nil, unsupported("memberadd") +} +func (m *mockEtcd) MemberRemove(context.Context, *etcdserverpb.MemberRemoveRequest) (*etcdserverpb.MemberRemoveResponse, error) { + m.inc("memberremove") + return nil, etcdserver.ErrNotEnoughStartedMembers +} +func (m *mockEtcd) MemberUpdate(context.Context, *etcdserverpb.MemberUpdateRequest) (*etcdserverpb.MemberUpdateResponse, error) { + m.inc("memberupdate") + return nil, unsupported("memberupdate") +} +func (m *mockEtcd) MemberList(context.Context, *etcdserverpb.MemberListRequest) (*etcdserverpb.MemberListResponse, error) { + m.inc("memberlist") + scheme := "http" + if m.e.config.Datastore.ServerTLSConfig.CertFile != "" { + scheme = "https" + } + + return &etcdserverpb.MemberListResponse{ + Header: &etcdserverpb.ResponseHeader{ + MemberId: 1, + }, + Members: []*etcdserverpb.Member{ + { + ID: 1, + Name: m.e.name, + IsLearner: m.isLearner, + ClientURLs: []string{scheme + "://127.0.0.1:2379"}, + PeerURLs: []string{scheme + "://" + m.e.address + ":2380"}, + }, + }, + }, nil +} + +func (m *mockEtcd) MemberPromote(context.Context, *etcdserverpb.MemberPromoteRequest) (*etcdserverpb.MemberPromoteResponse, error) { + m.inc("memberpromote") + return nil, unsupported("memberpromote") +} + +func unsupported(field string) error { + return status.New(codes.Unimplemented, field+" is not implemented").Err() +} diff --git a/tests/unit.go b/tests/unit.go index ee76af03851a..5bc40cea3b55 100644 --- a/tests/unit.go +++ b/tests/unit.go @@ -54,6 +54,10 @@ func GenerateRuntime(cnf *config.Control) error { deps.CreateRuntimeCertFiles(cnf) + cnf.Datastore.ServerTLSConfig.CAFile = cnf.Runtime.ETCDServerCA + cnf.Datastore.ServerTLSConfig.CertFile = cnf.Runtime.ServerETCDCert + cnf.Datastore.ServerTLSConfig.KeyFile = cnf.Runtime.ServerETCDKey + return deps.GenServerDeps(cnf) } From 90980629d444b0d285d60e500dcfe90bcf2088f5 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Mon, 4 Nov 2024 21:48:45 +0000 Subject: [PATCH 4/9] Set kine EmulatedETCDVersion from embedded etcd version Signed-off-by: Brad Davidson (cherry picked from commit bc60ff79f6ee5fc784a8bbcaab9b502bc304b2f6) Signed-off-by: Brad Davidson --- pkg/cli/server/server.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/cli/server/server.go b/pkg/cli/server/server.go index 379a29fda752..fa8e14ee061e 100644 --- a/pkg/cli/server/server.go +++ b/pkg/cli/server/server.go @@ -32,6 +32,7 @@ import ( "github.com/rancher/wrangler/v3/pkg/signals" "github.com/sirupsen/logrus" "github.com/urfave/cli" + etcdversion "go.etcd.io/etcd/api/v3/version" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" utilnet "k8s.io/apimachinery/pkg/util/net" kubeapiserverflag "k8s.io/component-base/cli/flag" @@ -147,6 +148,7 @@ func run(app *cli.Context, cfg *cmds.Server, leaderControllers server.CustomCont serverConfig.ControlConfig.ExtraSchedulerAPIArgs = cfg.ExtraSchedulerArgs serverConfig.ControlConfig.ClusterDomain = cfg.ClusterDomain serverConfig.ControlConfig.Datastore.NotifyInterval = 5 * time.Second + serverConfig.ControlConfig.Datastore.EmulatedETCDVersion = etcdversion.Version serverConfig.ControlConfig.Datastore.Endpoint = cfg.DatastoreEndpoint serverConfig.ControlConfig.Datastore.BackendTLSConfig.CAFile = cfg.DatastoreCAFile serverConfig.ControlConfig.Datastore.BackendTLSConfig.CertFile = cfg.DatastoreCertFile From bb2673c3be1c8ba8e2ceb0334d7be4e6649151f8 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Mon, 4 Nov 2024 22:03:03 +0000 Subject: [PATCH 5/9] Fix git-sha warning in etcd startup logs Signed-off-by: Brad Davidson (cherry picked from commit b67249ec777d738d25eb888a65007bd185d46ac8) Signed-off-by: Brad Davidson --- scripts/build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/build b/scripts/build index 43e4614fc43e..257b20ec27f0 100755 --- a/scripts/build +++ b/scripts/build @@ -52,7 +52,7 @@ VERSIONFLAGS=" -X ${PKG_CRI_DOCKERD}/cmd/version.GitCommit=HEAD -X ${PKG_CRI_DOCKERD}/cmd/version.BuildTime=${buildDate} - -X ${PKG_ETCD}/api/version.GitSHA=HEAD + -X ${PKG_ETCD}/api/v3/version.GitSHA=HEAD " if [ -n "${DEBUG}" ]; then GCFLAGS="-N -l" From ec407eaffe8311dd9f67475dedd079c9e8cf5f28 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Wed, 30 Oct 2024 20:55:40 +0000 Subject: [PATCH 6/9] Add nonroot-devices flag to agent CLI Add new flag that is passed through to the device_ownership_from_security_context parameter in the containerd CRI config. This is not possible to change without providing a complete custom containerd.toml template so we should add a flag for it. Signed-off-by: Brad Davidson (cherry picked from commit 56fb3b09917be371709a7c94b44025f3b730a2dc) Signed-off-by: Brad Davidson --- pkg/agent/config/config.go | 1 + pkg/agent/containerd/config_linux.go | 1 + pkg/agent/templates/templates.go | 1 + pkg/agent/templates/templates_linux.go | 1 + pkg/cli/cmds/agent.go | 7 +++++++ pkg/cli/cmds/server.go | 1 + pkg/daemons/config/types.go | 27 +++++++++++++------------- 7 files changed, 26 insertions(+), 13 deletions(-) diff --git a/pkg/agent/config/config.go b/pkg/agent/config/config.go index f1850c5d105c..4883297713ad 100644 --- a/pkg/agent/config/config.go +++ b/pkg/agent/config/config.go @@ -603,6 +603,7 @@ func get(ctx context.Context, envInfo *cmds.Agent, proxy proxy.Proxy) (*config.N nodeConfig.Containerd.Log = filepath.Join(envInfo.DataDir, "agent", "containerd", "containerd.log") nodeConfig.Containerd.Registry = filepath.Join(envInfo.DataDir, "agent", "etc", "containerd", "certs.d") nodeConfig.Containerd.NoDefault = envInfo.ContainerdNoDefault + nodeConfig.Containerd.NonrootDevices = envInfo.ContainerdNonrootDevices nodeConfig.Containerd.Debug = envInfo.Debug applyContainerdStateAndAddress(nodeConfig) applyCRIDockerdAddress(nodeConfig) diff --git a/pkg/agent/containerd/config_linux.go b/pkg/agent/containerd/config_linux.go index 5bd7df655a62..a18328b3ecee 100644 --- a/pkg/agent/containerd/config_linux.go +++ b/pkg/agent/containerd/config_linux.go @@ -73,6 +73,7 @@ func SetupContainerdConfig(cfg *config.Node) error { SystemdCgroup: cfg.AgentConfig.Systemd, IsRunningInUserNS: isRunningInUserNS, EnableUnprivileged: kernel.CheckKernelVersion(4, 11, 0), + NonrootDevices: cfg.Containerd.NonrootDevices, PrivateRegistryConfig: cfg.AgentConfig.Registry, ExtraRuntimes: extraRuntimes, Program: version.Program, diff --git a/pkg/agent/templates/templates.go b/pkg/agent/templates/templates.go index 623eba4aae17..9a66b9007472 100644 --- a/pkg/agent/templates/templates.go +++ b/pkg/agent/templates/templates.go @@ -23,6 +23,7 @@ type ContainerdConfig struct { IsRunningInUserNS bool EnableUnprivileged bool NoDefaultEndpoint bool + NonrootDevices bool PrivateRegistryConfig *registries.Registry ExtraRuntimes map[string]ContainerdRuntimeConfig Program string diff --git a/pkg/agent/templates/templates_linux.go b/pkg/agent/templates/templates_linux.go index c064f6fcb4cd..dffce1737ccb 100644 --- a/pkg/agent/templates/templates_linux.go +++ b/pkg/agent/templates/templates_linux.go @@ -19,6 +19,7 @@ version = 2 enable_selinux = {{ .NodeConfig.SELinux }} enable_unprivileged_ports = {{ .EnableUnprivileged }} enable_unprivileged_icmp = {{ .EnableUnprivileged }} + device_ownership_from_security_context = {{ .NonrootDevices }} {{- if .DisableCgroup}} disable_cgroup = true diff --git a/pkg/cli/cmds/agent.go b/pkg/cli/cmds/agent.go index 751c6c31b144..a142cfbbe788 100644 --- a/pkg/cli/cmds/agent.go +++ b/pkg/cli/cmds/agent.go @@ -28,6 +28,7 @@ type Agent struct { Snapshotter string Docker bool ContainerdNoDefault bool + ContainerdNonrootDevices bool ContainerRuntimeEndpoint string DefaultRuntime string ImageServiceEndpoint string @@ -228,6 +229,11 @@ var ( Usage: "(agent/containerd) Disables containerd's fallback default registry endpoint when a mirror is configured for that registry", Destination: &AgentConfig.ContainerdNoDefault, } + NonrootDevicesFlag = &cli.BoolFlag{ + Name: "nonroot-devices", + Usage: "(agent/containerd) Allows non-root pods to access devices by setting device_ownership_from_security_context=true in the containerd CRI config", + Destination: &AgentConfig.ContainerdNonrootDevices, + } EnablePProfFlag = &cli.BoolFlag{ Name: "enable-pprof", Usage: "(experimental) Enable pprof endpoint on supervisor port", @@ -291,6 +297,7 @@ func NewAgentCommand(action func(ctx *cli.Context) error) cli.Command { SnapshotterFlag, PrivateRegistryFlag, DisableDefaultRegistryEndpointFlag, + NonrootDevicesFlag, AirgapExtraRegistryFlag, NodeIPFlag, BindAddressFlag, diff --git a/pkg/cli/cmds/server.go b/pkg/cli/cmds/server.go index 91dafa30995d..000adb9cebbd 100644 --- a/pkg/cli/cmds/server.go +++ b/pkg/cli/cmds/server.go @@ -527,6 +527,7 @@ var ServerFlags = []cli.Flag{ DefaultRuntimeFlag, ImageServiceEndpointFlag, DisableDefaultRegistryEndpointFlag, + NonrootDevicesFlag, PauseImageFlag, SnapshotterFlag, PrivateRegistryFlag, diff --git a/pkg/daemons/config/types.go b/pkg/daemons/config/types.go index 659059aee26d..f6336a2ba251 100644 --- a/pkg/daemons/config/types.go +++ b/pkg/daemons/config/types.go @@ -78,19 +78,20 @@ type EtcdS3 struct { } type Containerd struct { - Address string - Log string - Root string - State string - Config string - Opt string - Template string - BlockIOConfig string - RDTConfig string - Registry string - NoDefault bool - SELinux bool - Debug bool + Address string + Log string + Root string + State string + Config string + Opt string + Template string + BlockIOConfig string + RDTConfig string + Registry string + NoDefault bool + NonrootDevices bool + SELinux bool + Debug bool } type CRIDockerd struct { From 0269d10fe643fecfeaeb209e7262975ad5aa00c1 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Mon, 4 Nov 2024 20:55:39 +0000 Subject: [PATCH 7/9] Bump runc to v1.2.1 Signed-off-by: Brad Davidson (cherry picked from commit 917761ce54524b2156fd31ae33350ff19080b981) Signed-off-by: Brad Davidson --- go.mod | 9 +++++---- go.sum | 29 +++++++++++------------------ 2 files changed, 16 insertions(+), 22 deletions(-) diff --git a/go.mod b/go.mod index 32134862c961..ffd30fd36cd3 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.22.5 replace ( github.com/Microsoft/hcsshim => github.com/Microsoft/hcsshim v0.11.7 github.com/Mirantis/cri-dockerd => github.com/k3s-io/cri-dockerd v0.3.15-k3s1.31-3 //v1.31 + github.com/cilium/ebpf => github.com/cilium/ebpf v0.12.3 github.com/cloudnativelabs/kube-router/v2 => github.com/k3s-io/kube-router/v2 v2.2.1 github.com/containerd/containerd => github.com/k3s-io/containerd v1.7.22-k3s1 github.com/containerd/imgcrypt => github.com/containerd/imgcrypt v1.1.11 @@ -15,7 +16,7 @@ replace ( github.com/golang/protobuf => github.com/golang/protobuf v1.5.4 github.com/googleapis/gax-go/v2 => github.com/googleapis/gax-go/v2 v2.12.0 github.com/open-policy-agent/opa => github.com/open-policy-agent/opa v0.59.0 // github.com/Microsoft/hcsshim using bad version v0.42.2 - github.com/opencontainers/runc => github.com/k3s-io/runc v1.1.14-k3s1 + github.com/opencontainers/runc => github.com/k3s-io/runc v1.2.1-k3s1 github.com/opencontainers/selinux => github.com/opencontainers/selinux v1.11.0 github.com/prometheus/client_golang => github.com/prometheus/client_golang v1.19.1 github.com/prometheus/common => github.com/prometheus/common v0.55.0 @@ -201,8 +202,8 @@ require ( github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/chai2010/gettext-go v1.0.2 // indirect - github.com/checkpoint-restore/go-criu/v5 v5.3.0 // indirect - github.com/cilium/ebpf v0.11.0 // indirect + github.com/checkpoint-restore/go-criu/v6 v6.3.0 // indirect + github.com/cilium/ebpf v0.16.0 // indirect github.com/container-storage-interface/spec v1.9.0 // indirect github.com/containerd/btrfs/v2 v2.0.0 // indirect github.com/containerd/cgroups v1.1.0 // indirect @@ -227,7 +228,7 @@ require ( github.com/coreos/go-oidc v2.2.1+incompatible // indirect github.com/coreos/go-semver v0.3.1 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect - github.com/cyphar/filepath-securejoin v0.2.5 // indirect + github.com/cyphar/filepath-securejoin v0.3.4 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/daviddengcn/go-colortext v1.0.0 // indirect github.com/davidlazar/go-crypto v0.0.0-20200604182044-b73af7476f6c // indirect diff --git a/go.sum b/go.sum index 084e4a04d04e..2e6d4514b749 100644 --- a/go.sum +++ b/go.sum @@ -352,8 +352,8 @@ github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UF github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/chai2010/gettext-go v1.0.2 h1:1Lwwip6Q2QGsAdl/ZKPCwTe9fe0CjlUbqj5bFNSjIRk= github.com/chai2010/gettext-go v1.0.2/go.mod h1:y+wnP2cHYaVj19NZhYKAwEMH2CI1gNHeQQ+5AjwawxA= -github.com/checkpoint-restore/go-criu/v5 v5.3.0 h1:wpFFOoomK3389ue2lAb0Boag6XPht5QYpipxmSNL4d8= -github.com/checkpoint-restore/go-criu/v5 v5.3.0/go.mod h1:E/eQpaFtUKGOOSEBZgmKAcn+zUUwWxqcaKZlF54wK8E= +github.com/checkpoint-restore/go-criu/v6 v6.3.0 h1:mIdrSO2cPNWQY1truPg6uHLXyKHk3Z5Odx4wjKOASzA= +github.com/checkpoint-restore/go-criu/v6 v6.3.0/go.mod h1:rrRTN/uSwY2X+BPRl/gkulo9gsKOSAeVp9/K2tv7xZI= github.com/chromedp/cdproto v0.0.0-20230802225258-3cf4e6d46a89/go.mod h1:GKljq0VrfU4D5yc+2qA6OVr8pmO/MBbPEWqWQ/oqGEs= github.com/chromedp/chromedp v0.9.2/go.mod h1:LkSXJKONWTCHAfQasKFUZI+mxqS4tZqhmtGzzhLsnLs= github.com/chromedp/sysutil v1.0.0/go.mod h1:kgWmDdq8fTzXYcKIBqIYvRRTnYb9aNS9moAV0xufSww= @@ -363,12 +363,8 @@ github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5P github.com/chzyer/readline v1.5.1/go.mod h1:Eh+b79XXUwfKfcPLepksvw2tcLE/Ct21YObkaSkeBlk= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= github.com/chzyer/test v1.0.0/go.mod h1:2JlltgoNkt4TW/z9V/IzDdFaMTM2JPIi26O1pF38GC8= -github.com/cilium/ebpf v0.2.0/go.mod h1:To2CFviqOWL/M0gIMsvSMlqe7em/l1ALkX1PyjrX2Qs= -github.com/cilium/ebpf v0.4.0/go.mod h1:4tRaxcgiL706VnOzHOdBlY8IEAIdxINsQBcU4xJJXRs= -github.com/cilium/ebpf v0.7.0/go.mod h1:/oI2+1shJiTGAMgl6/RgJr36Eo1jzrRcAWbcXO2usCA= -github.com/cilium/ebpf v0.9.1/go.mod h1:+OhNOIXx/Fnu1IE8bJz2dzOA+VSfyTfdNUVdlQnxUFY= -github.com/cilium/ebpf v0.11.0 h1:V8gS/bTCCjX9uUnkUFUpPsksM8n1lXBAvHcpiFk1X2Y= -github.com/cilium/ebpf v0.11.0/go.mod h1:WE7CZAnqOL2RouJ4f1uyNhqr2P4CCvXFIqdRDUgWsVs= +github.com/cilium/ebpf v0.12.3 h1:8ht6F9MquybnY97at+VDZb3eQQr8ev79RueWeVaEcG4= +github.com/cilium/ebpf v0.12.3/go.mod h1:TctK1ivibvI3znr66ljgi4hqOT8EYQjz1KWBfb1UVgM= github.com/cncf/udpa/go v0.0.0-20220112060539-c52dc94e7fbe/go.mod h1:6pvJx4me5XPnfI9Z40ddWsdw2W/uZgQLFXToKeRcDiI= github.com/cncf/xds/go v0.0.0-20210922020428-25de7278fc84/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20220314180256-7f1daf1720fc/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= @@ -483,9 +479,8 @@ github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY= github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4= github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4= -github.com/cyphar/filepath-securejoin v0.2.4/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4= -github.com/cyphar/filepath-securejoin v0.2.5 h1:6iR5tXJ/e6tJZzzdMc1km3Sa7RRIVBKAK32O2s7AYfo= -github.com/cyphar/filepath-securejoin v0.2.5/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4= +github.com/cyphar/filepath-securejoin v0.3.4 h1:VBWugsJh2ZxJmLFSM06/0qzQyiQX2Qs0ViKrUAcqdZ8= +github.com/cyphar/filepath-securejoin v0.3.4/go.mod h1:8s/MCNJREmFK0H02MF6Ihv1nakJe4L/w3WZLHNkvlYM= github.com/d2g/dhcp4 v0.0.0-20170904100407-a1d1b6c41b1c/go.mod h1:Ct2BUK8SB0YC1SMSibvLzxjeJLnrYEVLULFNiHY9YfQ= github.com/d2g/dhcp4client v1.0.0/go.mod h1:j0hNfjhrt2SxUOw55nL0ATM/z4Yt3t2Kd1mW34z5W5s= github.com/d2g/dhcp4server v0.0.0-20181031114812-7d4a0a7f59a5/go.mod h1:Eo87+Kg/IX2hfWJfwxMzLyuSZyxSoAug2nGa1G2QAi8= @@ -582,8 +577,6 @@ github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHqu github.com/foxcpp/go-mockdns v1.0.0/go.mod h1:lgRN6+KxQBawyIghpnl5CezHFGS9VLzvtVlwxvzXTQ4= github.com/francoispqt/gojay v1.2.13 h1:d2m3sFjloqoIUQU3TsHBgj6qg/BVGlTBeHDUmyJnXKk= github.com/francoispqt/gojay v1.2.13/go.mod h1:ehT5mTG4ua4581f1++1WLG0vPdaA9HaiDsoyrBGkyDY= -github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k= -github.com/frankban/quicktest v1.14.0/go.mod h1:NeW+ay9A/U67EYXNFA1nPE8e/tnQv/09mUdL/ijj8og= github.com/frankban/quicktest v1.14.5 h1:dfYrrRyLtiqT9GyKXgdh+k4inNeTvmGbuSgZ3lx3GhA= github.com/frankban/quicktest v1.14.5/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= @@ -1050,8 +1043,8 @@ github.com/k3s-io/kubernetes/staging/src/k8s.io/mount-utils v1.31.2-k3s1 h1:Om9y github.com/k3s-io/kubernetes/staging/src/k8s.io/mount-utils v1.31.2-k3s1/go.mod h1:p5r0u2M9KzooTgHDz4zRsUt02y4Yx7/5uPwgr0nSGqg= github.com/k3s-io/kubernetes/staging/src/k8s.io/pod-security-admission v1.31.2-k3s1 h1:JPJvGUf0w7SvdoC06qiB795NfCAtoKWjQxxMqSCQBUs= github.com/k3s-io/kubernetes/staging/src/k8s.io/pod-security-admission v1.31.2-k3s1/go.mod h1:nOJes2FVv6qZXUU3CGubLnloPJVV4rZ+jm0bLRCKKOM= -github.com/k3s-io/runc v1.1.14-k3s1 h1:PcwbBuIfjI9A0T1fq7XIdIxqYHWarDlRln7QsppQQmQ= -github.com/k3s-io/runc v1.1.14-k3s1/go.mod h1:E4C2z+7BxR7GHXp0hAY53mek+x49X1LjPNeMTfRGvOA= +github.com/k3s-io/runc v1.2.1-k3s1 h1:74ZffmoNVdX+jO+XYLv0iU/A9Yse1DZ2slZog7gNDgE= +github.com/k3s-io/runc v1.2.1-k3s1/go.mod h1:/PXzF0h531HTMsYQnmxXkBD7YaGShm/2zcRB79dksUc= github.com/k3s-io/spegel v0.0.23-0.20240516234953-f3d2c4072314 h1:TrZb/yM0OtBuifPXlKaOfcxpJqzakA8+KsoO4c69ZLM= github.com/k3s-io/spegel v0.0.23-0.20240516234953-f3d2c4072314/go.mod h1:bMHfSjj1+Zf5VITCZe/wLjuni6rYAj/DjPU/kIVnhfA= github.com/karrick/godirwalk v1.17.0 h1:b4kY7nqDdioR/6qnbHQyDvmA17u5G1cZ6J+CZXwSWoI= @@ -1226,8 +1219,8 @@ github.com/moby/locker v1.0.1/go.mod h1:S7SDdo5zpBK84bzzVlKr2V0hz+7x9hWbYC/kq7oQ github.com/moby/spdystream v0.2.0/go.mod h1:f7i0iNDQJ059oMTcWxx8MA/zKFIuD/lY+0GqbN2Wy8c= github.com/moby/spdystream v0.4.0 h1:Vy79D6mHeJJjiPdFEL2yku1kl0chZpJfZcPpb16BRl8= github.com/moby/spdystream v0.4.0/go.mod h1:xBAYlnt/ay+11ShkdFKNAG7LsyK/tmNBVvVOwrfMgdI= -github.com/moby/sys/mountinfo v0.5.0/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdxbhnCLlSvSU= github.com/moby/sys/mountinfo v0.6.2/go.mod h1:IJb6JQeOklcdMU9F5xQ8ZALD+CUr5VlGpwtX+VE0rpI= +github.com/moby/sys/mountinfo v0.7.1/go.mod h1:IJb6JQeOklcdMU9F5xQ8ZALD+CUr5VlGpwtX+VE0rpI= github.com/moby/sys/mountinfo v0.7.2 h1:1shs6aH5s4o5H2zQLn796ADW1wMrIwHsyJ2v9KouLrg= github.com/moby/sys/mountinfo v0.7.2/go.mod h1:1YOa8w8Ih7uW0wALDUgT1dTTSBrZ+HiBLGws92L2RU4= github.com/moby/sys/sequential v0.5.0/go.mod h1:tH2cOOs5V9MlPiXcQzRC+eEyab644PWKGRYaaV5ZZlo= @@ -1506,7 +1499,6 @@ github.com/ruudk/golang-pdf417 v0.0.0-20201230142125-a7e3863a1245/go.mod h1:pQAZ github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= github.com/safchain/ethtool v0.2.0/go.mod h1:WkKB1DnNtvsMlDmQ50sgwowDJV/hGbJSOvJoEXs1AJQ= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= -github.com/seccomp/libseccomp-golang v0.9.2-0.20220502022130-f33da4d89646/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg= github.com/seccomp/libseccomp-golang v0.10.0 h1:aA4bp+/Zzi0BnWZ2F1wgNBs5gTpm+na2rWM6M9YjLpY= github.com/seccomp/libseccomp-golang v0.10.0/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg= github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo= @@ -1571,6 +1563,7 @@ github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkU github.com/spf13/cobra v0.0.2-0.20171109065643-2da4a54c5cee/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ= github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU= github.com/spf13/cobra v1.1.3/go.mod h1:pGADOWyqRD/YMrPZigI/zbliZ2wVD/23d+is3pSWzOo= +github.com/spf13/cobra v1.5.0/go.mod h1:dWXEIy2H428czQCjInthrTRUg7yKbok+2Qi/yBIJoUM= github.com/spf13/cobra v1.6.1/go.mod h1:IOw/AERYS7UzyrGinqmz6HLUo219MORXGxhbaJUqzrY= github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho= github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= @@ -1623,7 +1616,6 @@ github.com/tmc/grpc-websocket-proxy v0.0.0-20220101234140-673ab2c3ae75 h1:6fotK7 github.com/tmc/grpc-websocket-proxy v0.0.0-20220101234140-673ab2c3ae75/go.mod h1:KO6IkyS8Y3j8OdNO85qEYBsRPuteD+YciPomcXdrMnk= github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= github.com/urfave/cli v1.19.1/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= -github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= github.com/urfave/cli v1.22.2/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= github.com/urfave/cli v1.22.4/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= github.com/urfave/cli v1.22.10/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= @@ -1826,6 +1818,7 @@ golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u0 golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= golang.org/x/exp v0.0.0-20220827204233-334a2380cb91/go.mod h1:cyybsKvd6eL0RnXn6p/Grxp8F5bW7iYuBgsNCOHpMYE= +golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2/go.mod h1:CxIveKay+FTh1D0yPZemJVgC/95VzuuOLq5Qi4xnoYc= golang.org/x/exp v0.0.0-20230515195305-f3d0a9c9a5cc/go.mod h1:V1LtkGg67GoY2N1AnLN78QLrzxkLyJw7RJb1gzOOz9w= golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8= golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY= From 708454bd8914f001b8a0dac03090f758ec7ed9f9 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Tue, 5 Nov 2024 21:03:50 +0000 Subject: [PATCH 8/9] Fix MustFindString returning override flags on external CLI commands External CLI actions cannot short-circuit on --help or --version, so we cannot skip loading the config file if these flags are present when running these wrapped commands. The behavior of just returning the override flag name instead of the requested flag value was breaking data-dir lookup when running wrapped commands. Signed-off-by: Brad Davidson (cherry picked from commit ff5c633fe7a2422ed2aa18ce8cdf52f758433873) Signed-off-by: Brad Davidson --- cmd/k3s/main.go | 11 ++++---- pkg/configfilearg/defaultparser.go | 24 ++++++++++++++--- pkg/configfilearg/defaultparser_test.go | 34 ++++++++++++++++++++----- 3 files changed, 55 insertions(+), 14 deletions(-) diff --git a/cmd/k3s/main.go b/cmd/k3s/main.go index 52d5cefbf62e..90eae7268eab 100644 --- a/cmd/k3s/main.go +++ b/cmd/k3s/main.go @@ -8,6 +8,7 @@ import ( "os" "os/exec" "path/filepath" + "slices" "strconv" "strings" @@ -27,6 +28,7 @@ import ( ) var criDefaultConfigPath = "/etc/crictl.yaml" +var externalCLIActions = []string{"crictl", "ctr", "kubectl"} // main entrypoint for the k3s multicall binary func main() { @@ -105,7 +107,7 @@ func findDebug(args []string) bool { if debug { return debug } - debug, _ = strconv.ParseBool(configfilearg.MustFindString(args, "debug")) + debug, _ = strconv.ParseBool(configfilearg.MustFindString(args, "debug", externalCLIActions...)) return debug } @@ -125,7 +127,7 @@ func findDataDir(args []string) string { if dataDir != "" { return dataDir } - dataDir = configfilearg.MustFindString(args, "data-dir") + dataDir = configfilearg.MustFindString(args, "data-dir", externalCLIActions...) if d, err := datadir.Resolve(dataDir); err == nil { dataDir = d } else { @@ -143,7 +145,7 @@ func findPreferBundledBin(args []string) bool { fs.SetOutput(io.Discard) fs.BoolVar(&preferBundledBin, "prefer-bundled-bin", false, "Prefer bundled binaries") - preferRes := configfilearg.MustFindString(args, "prefer-bundled-bin") + preferRes := configfilearg.MustFindString(args, "prefer-bundled-bin", externalCLIActions...) if preferRes != "" { preferBundledBin, _ = strconv.ParseBool(preferRes) } @@ -158,8 +160,7 @@ func findPreferBundledBin(args []string) bool { // it returns false so that standard CLI wrapping can occur. func runCLIs(dataDir string) bool { progName := filepath.Base(os.Args[0]) - switch progName { - case "crictl", "ctr", "kubectl": + if slices.Contains(externalCLIActions, progName) { if err := externalCLI(progName, dataDir, os.Args[1:]); err != nil && !errors.Is(err, context.Canceled) { logrus.Fatal(err) } diff --git a/pkg/configfilearg/defaultparser.go b/pkg/configfilearg/defaultparser.go index b9b6d04fca2c..85d553bc08e8 100644 --- a/pkg/configfilearg/defaultparser.go +++ b/pkg/configfilearg/defaultparser.go @@ -1,6 +1,8 @@ package configfilearg import ( + "slices" + "github.com/k3s-io/k3s/pkg/cli/cmds" "github.com/k3s-io/k3s/pkg/version" "github.com/sirupsen/logrus" @@ -23,15 +25,31 @@ func MustParse(args []string) []string { return result } -func MustFindString(args []string, target string) string { +func MustFindString(args []string, target string, commandsWithoutOverride ...string) string { + overrideFlags := []string{"--help", "-h", "--version", "-v"} + // Check to see if the command or subcommand being executed supports override flags. + // Some subcommands such as `k3s ctr` or just `ctr` need to be extracted out even to + // provide version or help text, and we cannot short-circuit loading the config file. For + // these commands, treat failure to load the config file as a warning instead of a fatal. + if len(args) > 0 && args[0] == version.Program { + args = args[1:] + } + if len(args) > 0 && slices.Contains(commandsWithoutOverride, args[0]) { + overrideFlags = nil + } + parser := &Parser{ - OverrideFlags: []string{"--help", "-h", "--version", "-v"}, + OverrideFlags: overrideFlags, EnvName: version.ProgramUpper + "_CONFIG_FILE", DefaultConfig: "/etc/rancher/" + version.Program + "/config.yaml", } result, err := parser.FindString(args, target) if err != nil { - logrus.Fatal(err) + if len(overrideFlags) > 0 { + logrus.Fatal(err) + } else { + logrus.Warn(err) + } } return result } diff --git a/pkg/configfilearg/defaultparser_test.go b/pkg/configfilearg/defaultparser_test.go index 8ae8decc26fa..92002a8cb04a 100644 --- a/pkg/configfilearg/defaultparser_test.go +++ b/pkg/configfilearg/defaultparser_test.go @@ -85,7 +85,7 @@ func Test_UnitMustFindString(t *testing.T) { }{ { name: "Target not found in config file", - args: []string{"--foo", "bar"}, + args: []string{"k3s", "--foo", "bar"}, target: "token", want: "", @@ -95,7 +95,7 @@ func Test_UnitMustFindString(t *testing.T) { }, { name: "Target found in config file", - args: []string{"--foo", "bar"}, + args: []string{"k3s", "--foo", "bar"}, target: "token", want: "12345", @@ -104,11 +104,31 @@ func Test_UnitMustFindString(t *testing.T) { teardown: func() error { return os.Unsetenv("K3S_CONFIG_FILE") }, }, { - name: "Override flag found, function is short-circuited", - args: []string{"--foo", "bar", "-h"}, + name: "Override flag is returned if found", + args: []string{"k3s", "--foo", "bar", "--version"}, target: "token", - want: "-h", + want: "--version", + + setup: func() error { return os.Setenv("K3S_CONFIG_FILE", "./testdata/defaultdata.yaml") }, + teardown: func() error { return os.Unsetenv("K3S_CONFIG_FILE") }, + }, + { + name: "Override flag is not returned for specific subcommands", + args: []string{"k3s", "ctr", "--foo", "bar", "--version"}, + target: "token", + + want: "12345", + + setup: func() error { return os.Setenv("K3S_CONFIG_FILE", "./testdata/defaultdata.yaml") }, + teardown: func() error { return os.Unsetenv("K3S_CONFIG_FILE") }, + }, + { + name: "Override flag is not returned for specific subcommands", + args: []string{"kubectl", "--foo", "bar", "--help"}, + target: "token", + + want: "12345", setup: func() error { return os.Setenv("K3S_CONFIG_FILE", "./testdata/defaultdata.yaml") }, teardown: func() error { return os.Unsetenv("K3S_CONFIG_FILE") }, @@ -121,7 +141,9 @@ func Test_UnitMustFindString(t *testing.T) { t.Errorf("Setup for MustFindString() failed = %v", err) return } - if got := MustFindString(tt.args, tt.target); got != tt.want { + got := MustFindString(tt.args, tt.target, "crictl", "ctr", "kubectl") + t.Logf("MustFindString(%+v, %+v) = %s", tt.args, tt.target, got) + if got != tt.want { t.Errorf("MustFindString() = %+v\nWant = %+v", got, tt.want) } }) From 749f9f554fd28ec6100eff2fab3388d1a1b7fe2e Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Wed, 6 Nov 2024 01:05:44 +0000 Subject: [PATCH 9/9] Bump containerd to v1.7.23-k3s1 Signed-off-by: Brad Davidson (cherry picked from commit 9c32f838191fd1552322a0d4441a4d85e6efeea7) Signed-off-by: Brad Davidson --- go.mod | 4 ++-- go.sum | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index ffd30fd36cd3..0ce8e5cc7640 100644 --- a/go.mod +++ b/go.mod @@ -7,7 +7,7 @@ replace ( github.com/Mirantis/cri-dockerd => github.com/k3s-io/cri-dockerd v0.3.15-k3s1.31-3 //v1.31 github.com/cilium/ebpf => github.com/cilium/ebpf v0.12.3 github.com/cloudnativelabs/kube-router/v2 => github.com/k3s-io/kube-router/v2 v2.2.1 - github.com/containerd/containerd => github.com/k3s-io/containerd v1.7.22-k3s1 + github.com/containerd/containerd => github.com/k3s-io/containerd v1.7.23-k3s1 github.com/containerd/imgcrypt => github.com/containerd/imgcrypt v1.1.11 github.com/distribution/reference => github.com/distribution/reference v0.5.0 github.com/docker/distribution => github.com/docker/distribution v2.8.3+incompatible @@ -210,7 +210,7 @@ require ( github.com/containerd/console v1.0.4 // indirect github.com/containerd/containerd/api v1.8.0-rc.3 // indirect github.com/containerd/continuity v0.4.3 // indirect - github.com/containerd/errdefs v0.1.0 // indirect + github.com/containerd/errdefs v0.3.0 // indirect github.com/containerd/fifo v1.1.0 // indirect github.com/containerd/go-cni v1.1.10 // indirect github.com/containerd/go-runc v1.1.0 // indirect diff --git a/go.sum b/go.sum index 2e6d4514b749..b290beab9041 100644 --- a/go.sum +++ b/go.sum @@ -399,8 +399,8 @@ github.com/containerd/continuity v0.0.0-20210208174643-50096c924a4e/go.mod h1:EX github.com/containerd/continuity v0.3.0/go.mod h1:wJEAIwKOm/pBZuBd0JmeTvnLquTB1Ag8espWhkykbPM= github.com/containerd/continuity v0.4.3 h1:6HVkalIp+2u1ZLH1J/pYX2oBVXlJZvh1X1A7bEZ9Su8= github.com/containerd/continuity v0.4.3/go.mod h1:F6PTNCKepoxEaXLQp3wDAjygEnImnZ/7o4JzpodfroQ= -github.com/containerd/errdefs v0.1.0 h1:m0wCRBiu1WJT/Fr+iOoQHMQS/eP5myQ8lCv4Dz5ZURM= -github.com/containerd/errdefs v0.1.0/go.mod h1:YgWiiHtLmSeBrvpw+UfPijzbLaB77mEG1WwJTDETIV0= +github.com/containerd/errdefs v0.3.0 h1:FSZgGOeK4yuT/+DnF07/Olde/q4KBoMsaamhXxIMDp4= +github.com/containerd/errdefs v0.3.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M= github.com/containerd/fifo v1.0.0/go.mod h1:ocF/ME1SX5b1AOlWi9r677YJmCPSwwWnQ9O123vzpE4= github.com/containerd/fifo v1.1.0 h1:4I2mbh5stb1u6ycIABlBw9zgtlK8viPI9QkQNRQEEmY= github.com/containerd/fifo v1.1.0/go.mod h1:bmC4NWMbXlt2EZ0Hc7Fx7QzTFxgPID13eH0Qu+MAb2o= @@ -959,8 +959,8 @@ github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfV github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= github.com/jung-kurt/gofpdf v1.0.0/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= -github.com/k3s-io/containerd v1.7.22-k3s1 h1:+StsyV/pl4NL5gDA5dzcPi4anuhCI4ONuzCwjBwjrUE= -github.com/k3s-io/containerd v1.7.22-k3s1/go.mod h1:T9perze1nIMl5JzddImIgsCEDaM0i8nAfnm+U48DmJw= +github.com/k3s-io/containerd v1.7.23-k3s1 h1:icB7N6TuuDcr8cqCZMr5TRNPUKoBq45Y9Jjv2qqBpnw= +github.com/k3s-io/containerd v1.7.23-k3s1/go.mod h1:+vNritUoqQTlFedsBErSkyu1yqf3eYikE3oGbEICi+g= github.com/k3s-io/cri-dockerd v0.3.15-k3s1.31-3 h1:TH3zSbIM9zSZMeWKcWjQqeja3FsmJYwLUHglD7nuUEc= github.com/k3s-io/cri-dockerd v0.3.15-k3s1.31-3/go.mod h1:ny6wyM7fqfew5FABQ+MtKaU07rhsHhlXr/jtFsA2m8Y= github.com/k3s-io/cri-tools v1.31.0-k3s2 h1:nekOdJe5Hecm+C5eswg688uXTI0enUZOJYadmyU9pYw=