Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check node pressure & new option "node_ready" for --wait flag #7752

Merged
merged 18 commits into from
Apr 18, 2020
Merged
4 changes: 3 additions & 1 deletion pkg/addons/addons.go
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,9 @@ func Start(wg *sync.WaitGroup, cc *config.ClusterConfig, toEnable map[string]boo

var awg sync.WaitGroup

out.T(out.AddonEnable, "Enabling addons: {{.addons}}", out.V{"addons": strings.Join(toEnableList, ", ")})
defer func() { // making it show after verifications( not perfect till #7613 is closed)
medyagh marked this conversation as resolved.
Show resolved Hide resolved
out.T(out.AddonEnable, "Enabled addons: {{.addons}}", out.V{"addons": strings.Join(toEnableList, ", ")})
}()
for _, a := range toEnableList {
awg.Add(1)
go func(name string) {
Expand Down
10 changes: 6 additions & 4 deletions pkg/minikube/bootstrapper/bsutil/kverify/kverify.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,21 +32,23 @@ const (
// DefaultSAWaitKey is the name used in the flags for default service account
DefaultSAWaitKey = "default_sa"
// AppsRunning is the name used in the flags for waiting for k8s-apps to be running
AppsRunning = "apps_running"
AppsRunningKey = "apps_running"
// NodeReadyKey is the name used in the flags for waiting for the node status to be ready
NodeReadyKey = "node_ready"
)

// vars related to the --wait flag
var (
// DefaultComponents is map of the the default components to wait for
DefaultComponents = map[string]bool{APIServerWaitKey: true, SystemPodsWaitKey: true}
// NoWaitComponents is map of componets to wait for if specified 'none' or 'false'
NoComponents = map[string]bool{APIServerWaitKey: false, SystemPodsWaitKey: false, DefaultSAWaitKey: false, AppsRunning: false}
NoComponents = map[string]bool{APIServerWaitKey: false, SystemPodsWaitKey: false, DefaultSAWaitKey: false, AppsRunningKey: false, NodeReadyKey: false}
// AllComponents is map for waiting for all components.
AllComponents = map[string]bool{APIServerWaitKey: true, SystemPodsWaitKey: true, DefaultSAWaitKey: true, AppsRunning: true}
AllComponents = map[string]bool{APIServerWaitKey: true, SystemPodsWaitKey: true, DefaultSAWaitKey: true, AppsRunningKey: true}
// DefaultWaitList is list of all default components to wait for. only names to be used for start flags.
DefaultWaitList = []string{APIServerWaitKey, SystemPodsWaitKey}
// AllComponentsList list of all valid components keys to wait for. only names to be used used for start flags.
AllComponentsList = []string{APIServerWaitKey, SystemPodsWaitKey, DefaultSAWaitKey, AppsRunning}
AllComponentsList = []string{APIServerWaitKey, SystemPodsWaitKey, DefaultSAWaitKey, AppsRunningKey, NodeReadyKey}
// AppsRunningList running list are valid k8s-app components to wait for them to be running
AppsRunningList = []string{
"kube-dns", // coredns
Expand Down
142 changes: 142 additions & 0 deletions pkg/minikube/bootstrapper/bsutil/kverify/node_conditions.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
/*
Copyright 2020 The Kubernetes Authors All rights reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

// Package kverify verifies a running kubernetes cluster is healthy
package kverify

import (
"fmt"
"time"

"github.com/golang/glog"
"github.com/pkg/errors"
v1 "k8s.io/api/core/v1"
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
)

// NodeCondition represents a favorable or unfavorable node condition.
type NodeCondition struct {
Type v1.NodeConditionType
Status v1.ConditionStatus
Reason string
Message string
}

// DiskPressure detects if the condition is disk pressure
func (pc *NodeCondition) DiskPressure() bool {
return pc.Type == v1.NodeDiskPressure && pc.Status == v1.ConditionTrue
}

// MemoryPressure detects if the condition is memory pressure
func (pc *NodeCondition) MemoryPressure() bool {
return pc.Type == v1.NodeMemoryPressure && pc.Status == v1.ConditionTrue
}

// PIDPressure detects if the condition is PID pressure
func (pc *NodeCondition) PIDPressure() bool {
return pc.Type == v1.NodePIDPressure && pc.Status == v1.ConditionTrue
}

// NetworkUnavailable detects if the condition is PID pressure
func (pc *NodeCondition) NetworkUnavailable() bool {
return pc.Type == v1.NodeNetworkUnavailable && pc.Status == v1.ConditionTrue
}

const errTextFormat = "node has unwanted condition %q : Reason %q Message: %q"

// ErrMemoryPressure is thrown when there is node memory pressure condition
type ErrMemoryPressure struct {
NodeCondition
}

func (e *ErrMemoryPressure) Error() string {
return fmt.Sprintf(errTextFormat, e.Type, e.Reason, e.Message)
}

// ErrDiskPressure is thrown when there is node disk pressure condition
type ErrDiskPressure struct {
NodeCondition
}

func (e *ErrDiskPressure) Error() string {
return fmt.Sprintf(errTextFormat, e.Type, e.Reason, e.Message)
}

// ErrPIDPressure is thrown when there is node PID pressure condition
type ErrPIDPressure struct {
NodeCondition
}

func (e *ErrPIDPressure) Error() string {
return fmt.Sprintf(errTextFormat, e.Type, e.Reason, e.Message)
}

// ErrNetworkNotReady is thrown when there is node condition is network not ready
type ErrNetworkNotReady struct {
NodeCondition
}

func (e *ErrNetworkNotReady) Error() string {
return fmt.Sprintf(errTextFormat, e.Type, e.Reason, e.Message)
}

// NodePressure verfies that node is not under disk, memory, pid or network pressure.
func NodePressure(cs *kubernetes.Clientset) error {
glog.Info("verifying NodePressure condition ...")
start := time.Now()
defer func() {
glog.Infof("duration metric: took %s to run NodePressure ...", time.Since(start))
}()

ns, err := cs.CoreV1().Nodes().List(meta.ListOptions{})
if err != nil {
return errors.Wrap(err, "list nodes")
}

for _, n := range ns.Items {
glog.Infof("node storage ephemeral capacity is %s", n.Status.Capacity.StorageEphemeral())
glog.Infof("node cpu capacity is %s", n.Status.Capacity.Cpu().AsDec())
for _, c := range n.Status.Conditions {
pc := NodeCondition{Type: c.Type, Status: c.Status, Reason: c.Reason, Message: c.Message}
if pc.DiskPressure() {
return &ErrDiskPressure{
NodeCondition: pc,
}
}

if pc.MemoryPressure() {
return &ErrMemoryPressure{
NodeCondition: pc,
medyagh marked this conversation as resolved.
Show resolved Hide resolved
}
}

if pc.PIDPressure() {
return &ErrPIDPressure{
NodeCondition: pc,
}
}

if pc.NetworkUnavailable() {
return &ErrNetworkNotReady{
NodeCondition: pc,
}
}

}
}
return nil
}
64 changes: 64 additions & 0 deletions pkg/minikube/bootstrapper/bsutil/kverify/node_ready.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
Copyright 2020 The Kubernetes Authors All rights reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

// Package kverify verifies a running kubernetes cluster is healthy
package kverify

import (
"fmt"
"time"

"github.com/golang/glog"
"github.com/pkg/errors"
v1 "k8s.io/api/core/v1"
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/kubernetes"
kconst "k8s.io/kubernetes/cmd/kubeadm/app/constants"
)

// WaitForNodeReady waits till kube client reports node status as "ready"
func WaitForNodeReady(cs *kubernetes.Clientset, timeout time.Duration) error {
glog.Info("waiting for node status to be ready ...")
start := time.Now()
defer func() {
glog.Infof("duration metric: took %s to wait for WaitForNodeReady...", time.Since(start))
}()
checkReady := func() (bool, error) {
if time.Since(start) > timeout {
return false, fmt.Errorf("wait for node to be ready timed out")
}
ns, err := cs.CoreV1().Nodes().List(meta.ListOptions{})
if err != nil {
glog.Infof("error listing nodes will retry: %v", err)
return false, nil
}

for _, n := range ns.Items {
for _, c := range n.Status.Conditions {
if c.Type == v1.NodeReady && c.Status != v1.ConditionTrue {
glog.Infof("node %q has unwanted condition %q : Reason %q Message: %q. will try. ", n.Name, c.Type, c.Reason, c.Message)
return false, nil
}
}
}
return true, nil
}
if err := wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, checkReady); err != nil {
return errors.Wrapf(err, "wait node ready")
}
return nil
}
Loading