From 3c7d2e0351d22f2cb6f6cb69c65b388f6c0b6c27 Mon Sep 17 00:00:00 2001 From: Predrag Rogic Date: Tue, 9 Feb 2021 19:51:26 +0000 Subject: [PATCH 1/6] fix WaitForPod by waiting for component Ready instead of pod Running status --- .../bootstrapper/bsutil/kverify/kverify.go | 9 ++ .../bootstrapper/bsutil/kverify/pod_ready.go | 139 ++++++++++++++++++ .../bsutil/kverify/system_pods.go | 37 ++--- pkg/minikube/bootstrapper/kubeadm/kubeadm.go | 8 +- test/integration/functional_test.go | 18 ++- 5 files changed, 175 insertions(+), 36 deletions(-) create mode 100644 pkg/minikube/bootstrapper/bsutil/kverify/pod_ready.go diff --git a/pkg/minikube/bootstrapper/bsutil/kverify/kverify.go b/pkg/minikube/bootstrapper/bsutil/kverify/kverify.go index e12f2d707420..98db0a125a7c 100644 --- a/pkg/minikube/bootstrapper/bsutil/kverify/kverify.go +++ b/pkg/minikube/bootstrapper/bsutil/kverify/kverify.go @@ -60,6 +60,15 @@ var ( "kube-proxy", "kube-scheduler", } + // SystemPodsList is a list of essential pods for running kurnetes to wait for them to be Ready + SystemPodsList = []string{ + "kube-dns", // coredns + "etcd", + "kube-apiserver", + "kube-controller-manager", + "kube-proxy", + "kube-scheduler", + } ) // ShouldWait will return true if the config says need to wait diff --git a/pkg/minikube/bootstrapper/bsutil/kverify/pod_ready.go b/pkg/minikube/bootstrapper/bsutil/kverify/pod_ready.go new file mode 100644 index 000000000000..2cade7640909 --- /dev/null +++ b/pkg/minikube/bootstrapper/bsutil/kverify/pod_ready.go @@ -0,0 +1,139 @@ +/* +Copyright 2021 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package kverify verifies a running Kubernetes cluster is healthy +package kverify + +import ( + "fmt" + "strings" + "time" + + "github.com/pkg/errors" + core "k8s.io/api/core/v1" + meta "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/kubernetes" + "k8s.io/klog/v2" + kconst "k8s.io/kubernetes/cmd/kubeadm/app/constants" +) + +// WaitForPodReadyByLabel waits for pod with label ([key:]val) in a namespace to be in Ready condition. +// If namespace is not provided, it defaults to "kube-system". +// If label key is not provided, it will try with "component" and "k8s-app". +func WaitForPodReadyByLabel(cs *kubernetes.Clientset, label, namespace string, timeout time.Duration) error { + klog.Infof("waiting %v for pod with %q label in %q namespace to be Ready ...", timeout, label, namespace) + start := time.Now() + defer func() { + klog.Infof("duration metric: took %v to run WaitForPodReadyByLabel for pod with %q label in %q namespace ...", time.Since(start), label, namespace) + }() + + if namespace == "" { + namespace = "kube-system" + } + + lkey := "" + lval := "" + l := strings.Split(label, ":") + switch len(l) { + case 1: // treat as no label key provided, just val + lval = strings.TrimSpace(l[0]) + case 2: + lkey = strings.TrimSpace(l[0]) + lval = strings.TrimSpace(l[1]) + default: + return fmt.Errorf("pod label %q is malformed", label) + } + + checkReady := func() (bool, error) { + if time.Since(start) > timeout { + return false, fmt.Errorf("wait for pod with %q label in %q namespace to be Ready timed out", label, namespace) + } + + pods, err := cs.CoreV1().Pods(namespace).List(meta.ListOptions{}) + if err != nil { + klog.Infof("error listing pods in %q namespace, will retry: %v", namespace, err) + return false, nil + } + for _, pod := range pods.Items { + for k, v := range pod.ObjectMeta.Labels { + if ((lkey == "" && (k == "component" || k == "k8s-app")) || lkey == k) && v == lval { + return checkPodStatus(&pod) + } + } + } + klog.Infof("pod with %q label in %q namespace was not found, will retry", label, namespace) + return false, nil + } + + if err := wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, checkReady); err != nil { + return errors.Wrapf(err, "wait pod Ready") + } + + return nil +} + +// WaitForPodReadyByName waits for pod with name in a namespace to be in Ready condition. +// If namespace is not provided, it defaults to "kube-system". +func WaitForPodReadyByName(cs *kubernetes.Clientset, name, namespace string, timeout time.Duration) error { + klog.Infof("waiting %v for pod %q in %q namespace to be Ready ...", timeout, name, namespace) + start := time.Now() + defer func() { + klog.Infof("duration metric: took %v to run WaitForPodReadyByName for pod %q in %q namespace ...", time.Since(start), name, namespace) + }() + + if namespace == "" { + namespace = "kube-system" + } + + checkReady := func() (bool, error) { + if time.Since(start) > timeout { + return false, fmt.Errorf("wait for pod %q in %q namespace to be Ready timed out", name, namespace) + } + + pod, err := cs.CoreV1().Pods(namespace).Get(name, meta.GetOptions{}) + if err != nil { + klog.Infof("error getting pod %q in %q namespace, will retry: %v", name, namespace, err) + return false, nil + } + return checkPodStatus(pod) + } + + if err := wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, checkReady); err != nil { + return errors.Wrapf(err, "wait pod Ready") + } + + return nil +} + +// checkPodStatus returns if pod is Ready and any error occurred. +func checkPodStatus(pod *core.Pod) (bool, error) { + if pod.Status.Phase != core.PodRunning { + klog.Infof("pod %q in %q namespace is not Running, will retry: %+v", pod.Name, pod.Namespace, pod.Status) + return false, nil + } + for _, c := range pod.Status.Conditions { + if c.Type == core.PodReady { + if c.Status != core.ConditionTrue { + klog.Infof("pod %q in %q namespace is not Ready, will retry: %+v", pod.Name, pod.Namespace, c) + return false, nil + } + klog.Infof("pod %q in %q namespace is Ready ...", pod.Name, pod.Namespace) + return true, nil + } + } + return false, fmt.Errorf("pod %q in %q namespace does not have %q status: %+v", pod.Name, pod.Namespace, core.PodReady, pod.Status) +} diff --git a/pkg/minikube/bootstrapper/bsutil/kverify/system_pods.go b/pkg/minikube/bootstrapper/bsutil/kverify/system_pods.go index 740586b80308..56cbabc02c17 100644 --- a/pkg/minikube/bootstrapper/bsutil/kverify/system_pods.go +++ b/pkg/minikube/bootstrapper/bsutil/kverify/system_pods.go @@ -36,40 +36,25 @@ import ( "k8s.io/minikube/pkg/util/retry" ) -// WaitForSystemPods verifies essential pods for running kurnetes is running +// WaitForSystemPods verifies essential pods for running kurnetes are Ready func WaitForSystemPods(r cruntime.Manager, bs bootstrapper.Bootstrapper, cfg config.ClusterConfig, cr command.Runner, client *kubernetes.Clientset, start time.Time, timeout time.Duration) error { - klog.Info("waiting for kube-system pods to appear ...") + klog.Info("waiting for kube-system pods to be Ready ...") pStart := time.Now() + defer func() { + klog.Infof("duration metric: took %s for waiting for kube-system pods to be Ready ...", time.Since(pStart)) + }() - podList := func() error { - if time.Since(start) > minLogCheckTime { - announceProblems(r, bs, cfg, cr) - time.Sleep(kconst.APICallRetryInterval * 5) - } + if time.Since(start) > minLogCheckTime { + announceProblems(r, bs, cfg, cr) + time.Sleep(kconst.APICallRetryInterval * 5) + } - // Wait for any system pod, as waiting for apiserver may block until etcd - pods, err := client.CoreV1().Pods("kube-system").List(meta.ListOptions{}) - if err != nil { - klog.Warningf("pod list returned error: %v", err) + for _, label := range SystemPodsList { + if err := WaitForPodReadyByLabel(client, label, "kube-system", timeout); err != nil { return err } - - klog.Infof("%d kube-system pods found", len(pods.Items)) - for _, pod := range pods.Items { - klog.Infof(podStatusMsg(pod)) - } - - if len(pods.Items) < 2 { - return fmt.Errorf("only %d pod(s) have shown up", len(pods.Items)) - } - - return nil } - if err := retry.Local(podList, timeout); err != nil { - return fmt.Errorf("apiserver never returned a pod list") - } - klog.Infof("duration metric: took %s to wait for pod list to return data ...", time.Since(pStart)) return nil } diff --git a/pkg/minikube/bootstrapper/kubeadm/kubeadm.go b/pkg/minikube/bootstrapper/kubeadm/kubeadm.go index ad0348aa4030..062d09832b82 100644 --- a/pkg/minikube/bootstrapper/kubeadm/kubeadm.go +++ b/pkg/minikube/bootstrapper/kubeadm/kubeadm.go @@ -477,12 +477,8 @@ func (k *Bootstrapper) WaitForNode(cfg config.ClusterConfig, n config.Node, time if n.ControlPlane { if cfg.VerifyComponents[kverify.APIServerWaitKey] { - if err := kverify.WaitForAPIServerProcess(cr, k, cfg, k.c, start, timeout); err != nil { - return errors.Wrap(err, "wait for apiserver proc") - } - - if err := kverify.WaitForHealthyAPIServer(cr, k, cfg, k.c, client, start, hostname, port, timeout); err != nil { - return errors.Wrap(err, "wait for healthy API server") + if err := kverify.WaitForPodReadyByLabel(client, "component: kube-apiserver", "kube-system", timeout); err != nil { + return errors.Wrapf(err, "waiting for API server pod to be Ready") } } diff --git a/test/integration/functional_test.go b/test/integration/functional_test.go index c63c6f94a8a5..893291118493 100644 --- a/test/integration/functional_test.go +++ b/test/integration/functional_test.go @@ -474,12 +474,22 @@ func validateComponentHealth(ctx context.Context, t *testing.T, profile string) for _, i := range cs.Items { for _, l := range i.Labels { - t.Logf("%s phase: %s", l, i.Status.Phase) - _, ok := found[l] - if ok { + if _, ok := found[l]; ok { // skip irrelevant (eg, repeating/redundant '"tier": "control-plane"') labels found[l] = true - if i.Status.Phase != "Running" { + t.Logf("%s phase: %s", l, i.Status.Phase) + if i.Status.Phase != api.PodRunning { t.Errorf("%s is not Running: %+v", l, i.Status) + continue + } + for _, c := range i.Status.Conditions { + if c.Type == api.PodReady { + if c.Status != api.ConditionTrue { + t.Errorf("%s is not Ready: %+v", l, i.Status) + } else { + t.Logf("%s status: %s", l, c.Type) + } + break + } } } } From 93e98dee6852d7aad7dfef8eca425430519ca9ec Mon Sep 17 00:00:00 2001 From: Predrag Rogic Date: Fri, 12 Feb 2021 00:40:42 +0000 Subject: [PATCH 2/6] wait CorePodsList components to be Ready only if --wait=all --- .../bootstrapper/bsutil/kverify/kverify.go | 10 +++-- .../bootstrapper/bsutil/kverify/pod_ready.go | 21 +++++++++++ .../bsutil/kverify/system_pods.go | 37 +++++++++++++------ pkg/minikube/bootstrapper/kubeadm/kubeadm.go | 14 ++++++- 4 files changed, 65 insertions(+), 17 deletions(-) diff --git a/pkg/minikube/bootstrapper/bsutil/kverify/kverify.go b/pkg/minikube/bootstrapper/bsutil/kverify/kverify.go index 98db0a125a7c..f6abca315f75 100644 --- a/pkg/minikube/bootstrapper/bsutil/kverify/kverify.go +++ b/pkg/minikube/bootstrapper/bsutil/kverify/kverify.go @@ -37,6 +37,8 @@ const ( NodeReadyKey = "node_ready" // KubeletKey is the name used in the flags for waiting for the kubelet status to be ready KubeletKey = "kubelet" + // OperationalKey is the name used for waiting for pods in CorePodsList to be Ready + OperationalKey = "operational" ) // vars related to the --wait flag @@ -44,9 +46,9 @@ var ( // DefaultComponents is map of the the default components to wait for DefaultComponents = map[string]bool{APIServerWaitKey: true, SystemPodsWaitKey: true} // NoWaitComponents is map of componets to wait for if specified 'none' or 'false' - NoComponents = map[string]bool{APIServerWaitKey: false, SystemPodsWaitKey: false, DefaultSAWaitKey: false, AppsRunningKey: false, NodeReadyKey: false, KubeletKey: false} + NoComponents = map[string]bool{APIServerWaitKey: false, SystemPodsWaitKey: false, DefaultSAWaitKey: false, AppsRunningKey: false, NodeReadyKey: false, KubeletKey: false, OperationalKey: false} // AllComponents is map for waiting for all components. - AllComponents = map[string]bool{APIServerWaitKey: true, SystemPodsWaitKey: true, DefaultSAWaitKey: true, AppsRunningKey: true, NodeReadyKey: true, KubeletKey: true} + AllComponents = map[string]bool{APIServerWaitKey: true, SystemPodsWaitKey: true, DefaultSAWaitKey: true, AppsRunningKey: true, NodeReadyKey: true, KubeletKey: true, OperationalKey: true} // DefaultWaitList is list of all default components to wait for. only names to be used for start flags. DefaultWaitList = []string{APIServerWaitKey, SystemPodsWaitKey} // AllComponentsList list of all valid components keys to wait for. only names to be used used for start flags. @@ -60,8 +62,8 @@ var ( "kube-proxy", "kube-scheduler", } - // SystemPodsList is a list of essential pods for running kurnetes to wait for them to be Ready - SystemPodsList = []string{ + // CorePodsList is a list of essential pods for running kurnetes to wait for them to be operational ("Ready") + CorePodsList = []string{ "kube-dns", // coredns "etcd", "kube-apiserver", diff --git a/pkg/minikube/bootstrapper/bsutil/kverify/pod_ready.go b/pkg/minikube/bootstrapper/bsutil/kverify/pod_ready.go index 2cade7640909..d9c4c7053971 100644 --- a/pkg/minikube/bootstrapper/bsutil/kverify/pod_ready.go +++ b/pkg/minikube/bootstrapper/bsutil/kverify/pod_ready.go @@ -31,6 +31,27 @@ import ( kconst "k8s.io/kubernetes/cmd/kubeadm/app/constants" ) +// WaitOperational calls WaitForPodReadyByLabel for each pod in labels list and returns any errors occurred. +func WaitOperational(cs *kubernetes.Clientset, labels []string, timeout time.Duration) error { + klog.Info("waiting for kube-system core pods %s to be Ready ...", labels) + pStart := time.Now() + defer func() { + klog.Infof("duration metric: took %s for waiting for kube-system core pods to be Ready ...", time.Since(pStart)) + }() + + var errs []string + for _, label := range labels { + if err := WaitForPodReadyByLabel(cs, label, "kube-system", timeout); err != nil { + errs = append(errs, fmt.Sprintf("%q: %q", label, err.Error())) + } + } + if errs != nil { + return fmt.Errorf(strings.Join(errs, ", ")) + } + + return nil +} + // WaitForPodReadyByLabel waits for pod with label ([key:]val) in a namespace to be in Ready condition. // If namespace is not provided, it defaults to "kube-system". // If label key is not provided, it will try with "component" and "k8s-app". diff --git a/pkg/minikube/bootstrapper/bsutil/kverify/system_pods.go b/pkg/minikube/bootstrapper/bsutil/kverify/system_pods.go index 56cbabc02c17..740586b80308 100644 --- a/pkg/minikube/bootstrapper/bsutil/kverify/system_pods.go +++ b/pkg/minikube/bootstrapper/bsutil/kverify/system_pods.go @@ -36,25 +36,40 @@ import ( "k8s.io/minikube/pkg/util/retry" ) -// WaitForSystemPods verifies essential pods for running kurnetes are Ready +// WaitForSystemPods verifies essential pods for running kurnetes is running func WaitForSystemPods(r cruntime.Manager, bs bootstrapper.Bootstrapper, cfg config.ClusterConfig, cr command.Runner, client *kubernetes.Clientset, start time.Time, timeout time.Duration) error { - klog.Info("waiting for kube-system pods to be Ready ...") + klog.Info("waiting for kube-system pods to appear ...") pStart := time.Now() - defer func() { - klog.Infof("duration metric: took %s for waiting for kube-system pods to be Ready ...", time.Since(pStart)) - }() - if time.Since(start) > minLogCheckTime { - announceProblems(r, bs, cfg, cr) - time.Sleep(kconst.APICallRetryInterval * 5) - } + podList := func() error { + if time.Since(start) > minLogCheckTime { + announceProblems(r, bs, cfg, cr) + time.Sleep(kconst.APICallRetryInterval * 5) + } - for _, label := range SystemPodsList { - if err := WaitForPodReadyByLabel(client, label, "kube-system", timeout); err != nil { + // Wait for any system pod, as waiting for apiserver may block until etcd + pods, err := client.CoreV1().Pods("kube-system").List(meta.ListOptions{}) + if err != nil { + klog.Warningf("pod list returned error: %v", err) return err } + + klog.Infof("%d kube-system pods found", len(pods.Items)) + for _, pod := range pods.Items { + klog.Infof(podStatusMsg(pod)) + } + + if len(pods.Items) < 2 { + return fmt.Errorf("only %d pod(s) have shown up", len(pods.Items)) + } + + return nil } + if err := retry.Local(podList, timeout); err != nil { + return fmt.Errorf("apiserver never returned a pod list") + } + klog.Infof("duration metric: took %s to wait for pod list to return data ...", time.Since(pStart)) return nil } diff --git a/pkg/minikube/bootstrapper/kubeadm/kubeadm.go b/pkg/minikube/bootstrapper/kubeadm/kubeadm.go index 062d09832b82..6efdd1dbd7ee 100644 --- a/pkg/minikube/bootstrapper/kubeadm/kubeadm.go +++ b/pkg/minikube/bootstrapper/kubeadm/kubeadm.go @@ -477,8 +477,12 @@ func (k *Bootstrapper) WaitForNode(cfg config.ClusterConfig, n config.Node, time if n.ControlPlane { if cfg.VerifyComponents[kverify.APIServerWaitKey] { - if err := kverify.WaitForPodReadyByLabel(client, "component: kube-apiserver", "kube-system", timeout); err != nil { - return errors.Wrapf(err, "waiting for API server pod to be Ready") + if err := kverify.WaitForAPIServerProcess(cr, k, cfg, k.c, start, timeout); err != nil { + return errors.Wrap(err, "wait for apiserver proc") + } + + if err := kverify.WaitForHealthyAPIServer(cr, k, cfg, k.c, client, start, hostname, port, timeout); err != nil { + return errors.Wrap(err, "wait for healthy API server") } } @@ -499,6 +503,12 @@ func (k *Bootstrapper) WaitForNode(cfg config.ClusterConfig, n config.Node, time return errors.Wrap(err, "waiting for apps_running") } } + + if cfg.VerifyComponents[kverify.OperationalKey] { + if err := kverify.WaitOperational(client, kverify.CorePodsList, timeout); err != nil { + return errors.Wrap(err, "waiting for operational status") + } + } } if cfg.VerifyComponents[kverify.KubeletKey] { if err := kverify.WaitForService(k.c, "kubelet", timeout); err != nil { From ec97f1db4b059406c3b2e66525cc7252183958f2 Mon Sep 17 00:00:00 2001 From: Predrag Rogic Date: Fri, 12 Feb 2021 02:13:06 +0000 Subject: [PATCH 3/6] fix integration.validateExtraConfig test adding --wait=all --- test/integration/functional_test.go | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/test/integration/functional_test.go b/test/integration/functional_test.go index 893291118493..0f90a5cbc75f 100644 --- a/test/integration/functional_test.go +++ b/test/integration/functional_test.go @@ -256,7 +256,6 @@ func validateDockerEnv(ctx context.Context, t *testing.T, profile string) { if !strings.Contains(rr.Output(), expectedImgInside) { t.Fatalf("expected 'docker images' to have %q inside minikube. but the output is: *%s*", expectedImgInside, rr.Output()) } - } func validateStartWithProxy(ctx context.Context, t *testing.T, profile string) { @@ -269,7 +268,7 @@ func validateStartWithProxy(ctx context.Context, t *testing.T, profile string) { // Use more memory so that we may reliably fit MySQL and nginx // changing api server so later in soft start we verify it didn't change - startArgs := append([]string{"start", "-p", profile, "--memory=4000", fmt.Sprintf("--apiserver-port=%d", apiPortTest), "--wait=true"}, StartArgs()...) + startArgs := append([]string{"start", "-p", profile, "--memory=4000", fmt.Sprintf("--apiserver-port=%d", apiPortTest), "--wait=all"}, StartArgs()...) c := exec.CommandContext(ctx, Target(), startArgs...) env := os.Environ() env = append(env, fmt.Sprintf("HTTP_PROXY=%s", srv.Addr)) @@ -401,7 +400,6 @@ func validateMinikubeKubectlDirectCall(ctx context.Context, t *testing.T, profil if err != nil { t.Fatalf("failed to run kubectl directly. args %q: %v", rr.Command(), err) } - } func validateExtraConfig(ctx context.Context, t *testing.T, profile string) { @@ -409,7 +407,7 @@ func validateExtraConfig(ctx context.Context, t *testing.T, profile string) { start := time.Now() // The tests before this already created a profile, starting minikube with different --extra-config cmdline option. - startArgs := []string{"start", "-p", profile, "--extra-config=apiserver.enable-admission-plugins=NamespaceAutoProvision"} + startArgs := []string{"start", "-p", profile, "--extra-config=apiserver.enable-admission-plugins=NamespaceAutoProvision", "--wait=all"} c := exec.CommandContext(ctx, Target(), startArgs...) rr, err := Run(t, c) if err != nil { @@ -427,7 +425,6 @@ func validateExtraConfig(ctx context.Context, t *testing.T, profile string) { if !strings.Contains(afterCfg.Config.KubernetesConfig.ExtraOptions.String(), expectedExtraOptions) { t.Errorf("expected ExtraOptions to contain %s but got %s", expectedExtraOptions, afterCfg.Config.KubernetesConfig.ExtraOptions.String()) } - } // imageID returns a docker image id for image `image` and current architecture From 90cd9c3a6069e6a948c70ebbe0e63e32ecc2b0b8 Mon Sep 17 00:00:00 2001 From: Predrag Rogic Date: Tue, 16 Feb 2021 15:50:54 +0000 Subject: [PATCH 4/6] wait kubelet stabilise after restart --- .../bootstrapper/bsutil/kverify/pod_ready.go | 41 ++++++++++++++----- pkg/minikube/bootstrapper/kubeadm/kubeadm.go | 26 ++++++++---- 2 files changed, 49 insertions(+), 18 deletions(-) diff --git a/pkg/minikube/bootstrapper/bsutil/kverify/pod_ready.go b/pkg/minikube/bootstrapper/bsutil/kverify/pod_ready.go index d9c4c7053971..29115b66a550 100644 --- a/pkg/minikube/bootstrapper/bsutil/kverify/pod_ready.go +++ b/pkg/minikube/bootstrapper/bsutil/kverify/pod_ready.go @@ -79,6 +79,7 @@ func WaitForPodReadyByLabel(cs *kubernetes.Clientset, label, namespace string, t return fmt.Errorf("pod label %q is malformed", label) } + lap := time.Now() checkReady := func() (bool, error) { if time.Since(start) > timeout { return false, fmt.Errorf("wait for pod with %q label in %q namespace to be Ready timed out", label, namespace) @@ -92,7 +93,17 @@ func WaitForPodReadyByLabel(cs *kubernetes.Clientset, label, namespace string, t for _, pod := range pods.Items { for k, v := range pod.ObjectMeta.Labels { if ((lkey == "" && (k == "component" || k == "k8s-app")) || lkey == k) && v == lval { - return checkPodStatus(&pod) + ready, reason := IsPodReady(&pod) + if ready { + klog.Info(reason) + return true, nil + } + // reduce log spam + if time.Since(lap) > (1 * time.Second) { + klog.Info(reason) + lap = time.Now() + } + return false, nil } } } @@ -120,6 +131,7 @@ func WaitForPodReadyByName(cs *kubernetes.Clientset, name, namespace string, tim namespace = "kube-system" } + lap := time.Now() checkReady := func() (bool, error) { if time.Since(start) > timeout { return false, fmt.Errorf("wait for pod %q in %q namespace to be Ready timed out", name, namespace) @@ -130,7 +142,17 @@ func WaitForPodReadyByName(cs *kubernetes.Clientset, name, namespace string, tim klog.Infof("error getting pod %q in %q namespace, will retry: %v", name, namespace, err) return false, nil } - return checkPodStatus(pod) + ready, reason := IsPodReady(pod) + if ready { + klog.Info(reason) + return true, nil + } + // reduce log spam + if time.Since(lap) > (1 * time.Second) { + klog.Info(reason) + lap = time.Now() + } + return false, nil } if err := wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, checkReady); err != nil { @@ -140,21 +162,18 @@ func WaitForPodReadyByName(cs *kubernetes.Clientset, name, namespace string, tim return nil } -// checkPodStatus returns if pod is Ready and any error occurred. -func checkPodStatus(pod *core.Pod) (bool, error) { +// IsPodReady returns if pod is Ready and verbose reason. +func IsPodReady(pod *core.Pod) (ready bool, reason string) { if pod.Status.Phase != core.PodRunning { - klog.Infof("pod %q in %q namespace is not Running, will retry: %+v", pod.Name, pod.Namespace, pod.Status) - return false, nil + return false, fmt.Sprintf("pod %q in %q namespace is not Running: %+v", pod.Name, pod.Namespace, pod.Status) } for _, c := range pod.Status.Conditions { if c.Type == core.PodReady { if c.Status != core.ConditionTrue { - klog.Infof("pod %q in %q namespace is not Ready, will retry: %+v", pod.Name, pod.Namespace, c) - return false, nil + return false, fmt.Sprintf("pod %q in %q namespace is not Ready: %+v", pod.Name, pod.Namespace, c) } - klog.Infof("pod %q in %q namespace is Ready ...", pod.Name, pod.Namespace) - return true, nil + return true, fmt.Sprintf("pod %q in %q namespace is Ready: %+v", pod.Name, pod.Namespace, c) } } - return false, fmt.Errorf("pod %q in %q namespace does not have %q status: %+v", pod.Name, pod.Namespace, core.PodReady, pod.Status) + return false, fmt.Sprintf("pod %q in %q namespace does not have %q status: %+v", pod.Name, pod.Namespace, core.PodReady, pod.Status) } diff --git a/pkg/minikube/bootstrapper/kubeadm/kubeadm.go b/pkg/minikube/bootstrapper/kubeadm/kubeadm.go index 6efdd1dbd7ee..dea2a80b8127 100644 --- a/pkg/minikube/bootstrapper/kubeadm/kubeadm.go +++ b/pkg/minikube/bootstrapper/kubeadm/kubeadm.go @@ -470,6 +470,12 @@ func (k *Bootstrapper) WaitForNode(cfg config.ClusterConfig, n config.Node, time return nil } + if cfg.VerifyComponents[kverify.OperationalKey] { + if err := kverify.WaitOperational(client, kverify.CorePodsList, timeout); err != nil { + return errors.Wrap(err, "waiting for operational status") + } + } + cr, err := cruntime.New(cruntime.Config{Type: cfg.KubernetesConfig.ContainerRuntime, Runner: k.c}) if err != nil { return errors.Wrapf(err, "create runtme-manager %s", cfg.KubernetesConfig.ContainerRuntime) @@ -503,18 +509,12 @@ func (k *Bootstrapper) WaitForNode(cfg config.ClusterConfig, n config.Node, time return errors.Wrap(err, "waiting for apps_running") } } - - if cfg.VerifyComponents[kverify.OperationalKey] { - if err := kverify.WaitOperational(client, kverify.CorePodsList, timeout); err != nil { - return errors.Wrap(err, "waiting for operational status") - } - } } + if cfg.VerifyComponents[kverify.KubeletKey] { if err := kverify.WaitForService(k.c, "kubelet", timeout); err != nil { return errors.Wrap(err, "waiting for kubelet") } - } if cfg.VerifyComponents[kverify.NodeReadyKey] { @@ -664,6 +664,17 @@ func (k *Bootstrapper) restartControlPlane(cfg config.ClusterConfig) error { } } + if cfg.VerifyComponents[kverify.OperationalKey] { + // after kubelet is restarted (with 'kubeadm init phase kubelet-start' above), + // it appears to be immediately Ready as are all kube-system pods + // then (after ~10sec) it realises it has some changes to apply, implying also pods restarts + // so we wait for kubelet to initialise itself... + time.Sleep(10 * time.Second) + if err := kverify.WaitOperational(client, kverify.CorePodsList, kconst.DefaultControlPlaneTimeout); err != nil { + return errors.Wrap(err, "operational status") + } + } + cr, err := cruntime.New(cruntime.Config{Type: cfg.KubernetesConfig.ContainerRuntime, Runner: k.c}) if err != nil { return errors.Wrap(err, "runtime") @@ -704,6 +715,7 @@ func (k *Bootstrapper) restartControlPlane(cfg config.ClusterConfig) error { if err := bsutil.AdjustResourceLimits(k.c); err != nil { klog.Warningf("unable to adjust resource limits: %v", err) } + return nil } From b8052fe33d2d6daa61f501e52d163e31d88063a5 Mon Sep 17 00:00:00 2001 From: Predrag Rogic Date: Tue, 16 Feb 2021 22:27:26 +0000 Subject: [PATCH 5/6] replace sleep with retry.Expo() --- .../bootstrapper/bsutil/kverify/kverify.go | 10 ++--- .../bootstrapper/bsutil/kverify/pod_ready.go | 14 +++---- pkg/minikube/bootstrapper/kubeadm/kubeadm.go | 39 ++++++++++++++----- test/integration/functional_test.go | 1 + 4 files changed, 40 insertions(+), 24 deletions(-) diff --git a/pkg/minikube/bootstrapper/bsutil/kverify/kverify.go b/pkg/minikube/bootstrapper/bsutil/kverify/kverify.go index f6abca315f75..a9f9ec6638da 100644 --- a/pkg/minikube/bootstrapper/bsutil/kverify/kverify.go +++ b/pkg/minikube/bootstrapper/bsutil/kverify/kverify.go @@ -37,8 +37,8 @@ const ( NodeReadyKey = "node_ready" // KubeletKey is the name used in the flags for waiting for the kubelet status to be ready KubeletKey = "kubelet" - // OperationalKey is the name used for waiting for pods in CorePodsList to be Ready - OperationalKey = "operational" + // ExtraKey is the name used for extra waiting for pods in CorePodsList to be Ready + ExtraKey = "extra" ) // vars related to the --wait flag @@ -46,9 +46,9 @@ var ( // DefaultComponents is map of the the default components to wait for DefaultComponents = map[string]bool{APIServerWaitKey: true, SystemPodsWaitKey: true} // NoWaitComponents is map of componets to wait for if specified 'none' or 'false' - NoComponents = map[string]bool{APIServerWaitKey: false, SystemPodsWaitKey: false, DefaultSAWaitKey: false, AppsRunningKey: false, NodeReadyKey: false, KubeletKey: false, OperationalKey: false} + NoComponents = map[string]bool{APIServerWaitKey: false, SystemPodsWaitKey: false, DefaultSAWaitKey: false, AppsRunningKey: false, NodeReadyKey: false, KubeletKey: false, ExtraKey: false} // AllComponents is map for waiting for all components. - AllComponents = map[string]bool{APIServerWaitKey: true, SystemPodsWaitKey: true, DefaultSAWaitKey: true, AppsRunningKey: true, NodeReadyKey: true, KubeletKey: true, OperationalKey: true} + AllComponents = map[string]bool{APIServerWaitKey: true, SystemPodsWaitKey: true, DefaultSAWaitKey: true, AppsRunningKey: true, NodeReadyKey: true, KubeletKey: true, ExtraKey: true} // DefaultWaitList is list of all default components to wait for. only names to be used for start flags. DefaultWaitList = []string{APIServerWaitKey, SystemPodsWaitKey} // AllComponentsList list of all valid components keys to wait for. only names to be used used for start flags. @@ -62,7 +62,7 @@ var ( "kube-proxy", "kube-scheduler", } - // CorePodsList is a list of essential pods for running kurnetes to wait for them to be operational ("Ready") + // CorePodsList is a list of essential pods for running kurnetes to extra wait for them to be Ready CorePodsList = []string{ "kube-dns", // coredns "etcd", diff --git a/pkg/minikube/bootstrapper/bsutil/kverify/pod_ready.go b/pkg/minikube/bootstrapper/bsutil/kverify/pod_ready.go index 29115b66a550..125a2e5a6fcc 100644 --- a/pkg/minikube/bootstrapper/bsutil/kverify/pod_ready.go +++ b/pkg/minikube/bootstrapper/bsutil/kverify/pod_ready.go @@ -31,12 +31,12 @@ import ( kconst "k8s.io/kubernetes/cmd/kubeadm/app/constants" ) -// WaitOperational calls WaitForPodReadyByLabel for each pod in labels list and returns any errors occurred. -func WaitOperational(cs *kubernetes.Clientset, labels []string, timeout time.Duration) error { - klog.Info("waiting for kube-system core pods %s to be Ready ...", labels) - pStart := time.Now() +// WaitExtra calls WaitForPodReadyByLabel for each pod in labels list and returns any errors occurred. +func WaitExtra(cs *kubernetes.Clientset, labels []string, timeout time.Duration) error { + klog.Infof("extra waiting for kube-system core pods %s to be Ready ...", labels) + start := time.Now() defer func() { - klog.Infof("duration metric: took %s for waiting for kube-system core pods to be Ready ...", time.Since(pStart)) + klog.Infof("duration metric: took %s for extra waiting for kube-system core pods to be Ready ...", time.Since(start)) }() var errs []string @@ -84,7 +84,6 @@ func WaitForPodReadyByLabel(cs *kubernetes.Clientset, label, namespace string, t if time.Since(start) > timeout { return false, fmt.Errorf("wait for pod with %q label in %q namespace to be Ready timed out", label, namespace) } - pods, err := cs.CoreV1().Pods(namespace).List(meta.ListOptions{}) if err != nil { klog.Infof("error listing pods in %q namespace, will retry: %v", namespace, err) @@ -110,7 +109,6 @@ func WaitForPodReadyByLabel(cs *kubernetes.Clientset, label, namespace string, t klog.Infof("pod with %q label in %q namespace was not found, will retry", label, namespace) return false, nil } - if err := wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, checkReady); err != nil { return errors.Wrapf(err, "wait pod Ready") } @@ -136,7 +134,6 @@ func WaitForPodReadyByName(cs *kubernetes.Clientset, name, namespace string, tim if time.Since(start) > timeout { return false, fmt.Errorf("wait for pod %q in %q namespace to be Ready timed out", name, namespace) } - pod, err := cs.CoreV1().Pods(namespace).Get(name, meta.GetOptions{}) if err != nil { klog.Infof("error getting pod %q in %q namespace, will retry: %v", name, namespace, err) @@ -154,7 +151,6 @@ func WaitForPodReadyByName(cs *kubernetes.Clientset, name, namespace string, tim } return false, nil } - if err := wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, checkReady); err != nil { return errors.Wrapf(err, "wait pod Ready") } diff --git a/pkg/minikube/bootstrapper/kubeadm/kubeadm.go b/pkg/minikube/bootstrapper/kubeadm/kubeadm.go index dea2a80b8127..34194b954039 100644 --- a/pkg/minikube/bootstrapper/kubeadm/kubeadm.go +++ b/pkg/minikube/bootstrapper/kubeadm/kubeadm.go @@ -36,6 +36,7 @@ import ( "github.com/docker/machine/libmachine" "github.com/docker/machine/libmachine/state" "github.com/pkg/errors" + meta "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" "k8s.io/klog/v2" @@ -470,9 +471,9 @@ func (k *Bootstrapper) WaitForNode(cfg config.ClusterConfig, n config.Node, time return nil } - if cfg.VerifyComponents[kverify.OperationalKey] { - if err := kverify.WaitOperational(client, kverify.CorePodsList, timeout); err != nil { - return errors.Wrap(err, "waiting for operational status") + if cfg.VerifyComponents[kverify.ExtraKey] { + if err := kverify.WaitExtra(client, kverify.CorePodsList, timeout); err != nil { + return errors.Wrap(err, "extra waiting") } } @@ -664,14 +665,32 @@ func (k *Bootstrapper) restartControlPlane(cfg config.ClusterConfig) error { } } - if cfg.VerifyComponents[kverify.OperationalKey] { + if cfg.VerifyComponents[kverify.ExtraKey] { // after kubelet is restarted (with 'kubeadm init phase kubelet-start' above), - // it appears to be immediately Ready as are all kube-system pods - // then (after ~10sec) it realises it has some changes to apply, implying also pods restarts - // so we wait for kubelet to initialise itself... - time.Sleep(10 * time.Second) - if err := kverify.WaitOperational(client, kverify.CorePodsList, kconst.DefaultControlPlaneTimeout); err != nil { - return errors.Wrap(err, "operational status") + // it appears as to be immediately Ready as well as all kube-system pods, + // then (after ~10sec) it realises it has some changes to apply, implying also pods restarts, + // and by that time we would exit completely, so we wait until kubelet begins restarting pods + klog.Info("waiting for restarted kubelet to initialise ...") + start := time.Now() + wait := func() error { + pods, err := client.CoreV1().Pods("kube-system").List(meta.ListOptions{}) + if err != nil { + return err + } + for _, pod := range pods.Items { + if pod.Labels["tier"] == "control-plane" { + if ready, _ := kverify.IsPodReady(&pod); !ready { + return nil + } + } + } + return fmt.Errorf("kubelet not initialised") + } + _ = retry.Expo(wait, 250*time.Millisecond, 1*time.Minute) + klog.Infof("kubelet initialised") + klog.Infof("duration metric: took %s waiting for restarted kubelet to initialise ...", time.Since(start)) + if err := kverify.WaitExtra(client, kverify.CorePodsList, kconst.DefaultControlPlaneTimeout); err != nil { + return errors.Wrap(err, "extra") } } diff --git a/test/integration/functional_test.go b/test/integration/functional_test.go index 0f90a5cbc75f..d29a8f791635 100644 --- a/test/integration/functional_test.go +++ b/test/integration/functional_test.go @@ -448,6 +448,7 @@ func imageID(image string) string { } // validateComponentHealth asserts that all Kubernetes components are healthy +// note: it expects all components to be Ready, so it makes sense to run it close after only those tests that include '--wait=all' start flag (ie, with extra wait) func validateComponentHealth(ctx context.Context, t *testing.T, profile string) { defer PostMortemLogs(t, profile) From 8099f7514aeeda3e9e14ce5790c54018a8cb463e Mon Sep 17 00:00:00 2001 From: Predrag Rogic Date: Tue, 16 Feb 2021 22:56:04 +0000 Subject: [PATCH 6/6] cleanup: remove unused code, make internal-only func private --- .../bootstrapper/bsutil/kverify/pod_ready.go | 48 ++----------------- 1 file changed, 3 insertions(+), 45 deletions(-) diff --git a/pkg/minikube/bootstrapper/bsutil/kverify/pod_ready.go b/pkg/minikube/bootstrapper/bsutil/kverify/pod_ready.go index 125a2e5a6fcc..982cee5461dc 100644 --- a/pkg/minikube/bootstrapper/bsutil/kverify/pod_ready.go +++ b/pkg/minikube/bootstrapper/bsutil/kverify/pod_ready.go @@ -41,7 +41,7 @@ func WaitExtra(cs *kubernetes.Clientset, labels []string, timeout time.Duration) var errs []string for _, label := range labels { - if err := WaitForPodReadyByLabel(cs, label, "kube-system", timeout); err != nil { + if err := waitForPodReadyByLabel(cs, label, "kube-system", timeout); err != nil { errs = append(errs, fmt.Sprintf("%q: %q", label, err.Error())) } } @@ -52,10 +52,10 @@ func WaitExtra(cs *kubernetes.Clientset, labels []string, timeout time.Duration) return nil } -// WaitForPodReadyByLabel waits for pod with label ([key:]val) in a namespace to be in Ready condition. +// waitForPodReadyByLabel waits for pod with label ([key:]val) in a namespace to be in Ready condition. // If namespace is not provided, it defaults to "kube-system". // If label key is not provided, it will try with "component" and "k8s-app". -func WaitForPodReadyByLabel(cs *kubernetes.Clientset, label, namespace string, timeout time.Duration) error { +func waitForPodReadyByLabel(cs *kubernetes.Clientset, label, namespace string, timeout time.Duration) error { klog.Infof("waiting %v for pod with %q label in %q namespace to be Ready ...", timeout, label, namespace) start := time.Now() defer func() { @@ -116,48 +116,6 @@ func WaitForPodReadyByLabel(cs *kubernetes.Clientset, label, namespace string, t return nil } -// WaitForPodReadyByName waits for pod with name in a namespace to be in Ready condition. -// If namespace is not provided, it defaults to "kube-system". -func WaitForPodReadyByName(cs *kubernetes.Clientset, name, namespace string, timeout time.Duration) error { - klog.Infof("waiting %v for pod %q in %q namespace to be Ready ...", timeout, name, namespace) - start := time.Now() - defer func() { - klog.Infof("duration metric: took %v to run WaitForPodReadyByName for pod %q in %q namespace ...", time.Since(start), name, namespace) - }() - - if namespace == "" { - namespace = "kube-system" - } - - lap := time.Now() - checkReady := func() (bool, error) { - if time.Since(start) > timeout { - return false, fmt.Errorf("wait for pod %q in %q namespace to be Ready timed out", name, namespace) - } - pod, err := cs.CoreV1().Pods(namespace).Get(name, meta.GetOptions{}) - if err != nil { - klog.Infof("error getting pod %q in %q namespace, will retry: %v", name, namespace, err) - return false, nil - } - ready, reason := IsPodReady(pod) - if ready { - klog.Info(reason) - return true, nil - } - // reduce log spam - if time.Since(lap) > (1 * time.Second) { - klog.Info(reason) - lap = time.Now() - } - return false, nil - } - if err := wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, checkReady); err != nil { - return errors.Wrapf(err, "wait pod Ready") - } - - return nil -} - // IsPodReady returns if pod is Ready and verbose reason. func IsPodReady(pod *core.Pod) (ready bool, reason string) { if pod.Status.Phase != core.PodRunning {