Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

wip: add new wait components "node_ready" and "no_pressure" #7611

Closed
wants to merge 52 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
29dbd20
add disk_health wait component
medyagh Apr 11, 2020
27417d4
add emoji options for showing verification
medyagh Apr 11, 2020
1c2f35d
add disk health wait componenet
medyagh Apr 11, 2020
dd05613
handle node disk pressure
medyagh Apr 11, 2020
1684ab1
add memory pressure verification
medyagh Apr 11, 2020
6082bdf
add node pid verification
medyagh Apr 11, 2020
e21f680
add node ready condition
medyagh Apr 11, 2020
1224a78
return error
medyagh Apr 11, 2020
8103534
brush up
medyagh Apr 11, 2020
9ed9cf9
flip the debugging statement
medyagh Apr 11, 2020
2137e71
fix comment
medyagh Apr 11, 2020
7098285
refactor more
medyagh Apr 11, 2020
8aa20de
update docs
medyagh Apr 11, 2020
e604bea
comments
medyagh Apr 15, 2020
86bec59
rename to no_pressure
medyagh Apr 15, 2020
db99da7
rename drver
medyagh Apr 15, 2020
073de7b
lint
medyagh Apr 16, 2020
5bd32bb
add advice func
medyagh Apr 16, 2020
5d868ea
add advice node pressure
medyagh Apr 16, 2020
e5afabf
lint
medyagh Apr 16, 2020
4656a4e
fix typos
medyagh Apr 16, 2020
d9e4876
generate docs
medyagh Apr 16, 2020
bb318a7
simplify flags
medyagh Apr 16, 2020
c294a6c
try to delay printing enable addons
medyagh Apr 16, 2020
9d84424
tune message
medyagh Apr 16, 2020
d3db795
pass name
medyagh Apr 16, 2020
fe7c300
Add Windows integration test for Docker
tstromberg Apr 15, 2020
e8a3be4
Update addons.en.md
tstromberg Apr 16, 2020
6136d79
Mention addon testing
tstromberg Apr 16, 2020
db8023d
Remove hyperv-virtual-switch argument, delete all profiles
tstromberg Apr 15, 2020
5a2304d
enable addons after waiting for cluster up
medyagh Apr 16, 2020
79034e8
wait for default sa for containerd
medyagh Apr 16, 2020
9e8321d
robust creation of cluster role binding
medyagh Apr 16, 2020
5c0bf52
revert unrelated changes
medyagh Apr 16, 2020
4ad968a
lint
medyagh Apr 16, 2020
6585f70
use wait lib
medyagh Apr 16, 2020
196bb93
revert addon change for another PR
medyagh Apr 16, 2020
1dea706
revert unrelated changes
medyagh Apr 16, 2020
21f9a85
add test for second start no reset
medyagh Apr 16, 2020
efad68c
wait true
medyagh Apr 16, 2020
b00e6cd
wait all
medyagh Apr 16, 2020
c37e45f
Add support for specifing pr when running mkcmp
Apr 9, 2020
22aa787
fix lint
Apr 16, 2020
5ceb735
reintroduce unit test
Apr 16, 2020
fc16709
brush up
medyagh Apr 11, 2020
98fd0d9
refactor more
medyagh Apr 11, 2020
509144f
lint
medyagh Apr 16, 2020
c5fe5cd
add advice func
medyagh Apr 16, 2020
aaa4fb6
add advice node pressure
medyagh Apr 16, 2020
26afe7c
lint
medyagh Apr 16, 2020
4f4c169
pass name
medyagh Apr 16, 2020
6fab0f9
merge conflict
medyagh Apr 16, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion cmd/performance/mkcmp/cmd/mkcmp.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,11 @@ var rootCmd = &cobra.Command{
return validateArgs(args)
},
RunE: func(cmd *cobra.Command, args []string) error {
return perf.CompareMinikubeStart(context.Background(), os.Stdout, args)
binaries, err := retrieveBinaries(args)
if err != nil {
return err
}
return perf.CompareMinikubeStart(context.Background(), os.Stdout, binaries)
},
}

Expand All @@ -46,6 +50,18 @@ func validateArgs(args []string) error {
return nil
}

func retrieveBinaries(args []string) ([]*perf.Binary, error) {
binaries := []*perf.Binary{}
for _, a := range args {
binary, err := perf.NewBinary(a)
if err != nil {
return nil, err
}
binaries = append(binaries, binary)
}
return binaries, nil
}

// Execute runs the mkcmp command
func Execute() {
if err := rootCmd.Execute(); err != nil {
Expand Down
34 changes: 34 additions & 0 deletions hack/jenkins/windows_integration_test_docker.ps1
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Copyright 2019 The Kubernetes Authors All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

mkdir -p out
gsutil.cmd -m cp gs://minikube-builds/$env:MINIKUBE_LOCATION/minikube-windows-amd64.exe out/
gsutil.cmd -m cp gs://minikube-builds/$env:MINIKUBE_LOCATION/e2e-windows-amd64.exe out/
gsutil.cmd -m cp -r gs://minikube-builds/$env:MINIKUBE_LOCATION/testdata .

./out/minikube-windows-amd64.exe delete --all

out/e2e-windows-amd64.exe -minikube-start-args="--driver=docker" -binary=out/minikube-windows-amd64.exe -test.v -test.timeout=65m
$env:result=$lastexitcode
# If the last exit code was 0->success, x>0->error
If($env:result -eq 0){$env:status="success"}
Else {$env:status="failure"}

# $env:SHORT_COMMIT=$env:COMMIT.substring(0, 7)
# to be used later to implement https://github.com/kubernetes/minikube/issues/6593
$env:target_url="https://storage.googleapis.com/minikube-builds/logs/$env:MINIKUBE_LOCATION/Docker_Windows.txt"
$json = "{`"state`": `"$env:status`", `"description`": `"Jenkins`", `"target_url`": `"$env:target_url`", `"context`": `"Docker_Windows`"}"
Invoke-WebRequest -Uri "https://api.github.com/repos/kubernetes/minikube/statuses/$env:COMMIT`?access_token=$env:access_token" -Body $json -ContentType "application/json" -Method Post -usebasicparsing

Exit $env:result
4 changes: 2 additions & 2 deletions hack/jenkins/windows_integration_test_hyperv.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ gsutil.cmd -m cp gs://minikube-builds/$env:MINIKUBE_LOCATION/minikube-windows-am
gsutil.cmd -m cp gs://minikube-builds/$env:MINIKUBE_LOCATION/e2e-windows-amd64.exe out/
gsutil.cmd -m cp -r gs://minikube-builds/$env:MINIKUBE_LOCATION/testdata .

./out/minikube-windows-amd64.exe delete
./out/minikube-windows-amd64.exe delete --all

out/e2e-windows-amd64.exe -minikube-start-args="--driver=hyperv --hyperv-virtual-switch=primary-virtual-switch" -binary=out/minikube-windows-amd64.exe -test.v -test.timeout=65m
out/e2e-windows-amd64.exe -minikube-start-args="--driver=hyperv" -binary=out/minikube-windows-amd64.exe -test.v -test.timeout=65m
$env:result=$lastexitcode
# If the last exit code was 0->success, x>0->error
If($env:result -eq 0){$env:status="success"}
Expand Down
2 changes: 1 addition & 1 deletion pkg/addons/addons.go
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ func Start(wg *sync.WaitGroup, cc *config.ClusterConfig, toEnable map[string]boo

var awg sync.WaitGroup

out.T(out.AddonEnable, "Enabling addons: {{.addons}}", out.V{"addons": strings.Join(toEnableList, ", ")})
defer out.T(out.AddonEnable, "Enabling addons: {{.addons}}", out.V{"addons": strings.Join(toEnableList, ", ")})
for _, a := range toEnableList {
awg.Add(1)
go func(name string) {
Expand Down
15 changes: 8 additions & 7 deletions pkg/minikube/bootstrapper/bsutil/kverify/default_sa.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,36 +18,37 @@ limitations under the License.
package kverify

import (
"fmt"
"time"

"github.com/golang/glog"
"github.com/pkg/errors"
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/kubernetes"
"k8s.io/minikube/pkg/util/retry"
kconst "k8s.io/kubernetes/cmd/kubeadm/app/constants"
)

// WaitForDefaultSA waits for the default service account to be created.
func WaitForDefaultSA(cs *kubernetes.Clientset, timeout time.Duration) error {
glog.Info("waiting for default service account to be created ...")
start := time.Now()
saReady := func() error {
saReady := func() (bool, error) {
// equivalent to manual check of 'kubectl --context profile get serviceaccount default'
sas, err := cs.CoreV1().ServiceAccounts("default").List(meta.ListOptions{})
if err != nil {
glog.Infof("temproary error waiting for default SA: %v", err)
return err
return false, nil
}
for _, sa := range sas.Items {
if sa.Name == "default" {
glog.Infof("found service account: %q", sa.Name)
return nil
return true, nil
}
}
return fmt.Errorf("couldn't find default service account")
return false, nil
}
if err := retry.Expo(saReady, 500*time.Millisecond, timeout); err != nil {

if err := wait.PollImmediate(kconst.APICallRetryInterval, timeout, saReady); err != nil {
return errors.Wrapf(err, "waited %s for SA", time.Since(start))
}

Expand Down
15 changes: 10 additions & 5 deletions pkg/minikube/bootstrapper/bsutil/kverify/kverify.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,22 +31,27 @@ const (
SystemPodsWaitKey = "system_pods"
// DefaultSAWaitKey is the name used in the flags for default service account
DefaultSAWaitKey = "default_sa"
// AppsRunning is the name used in the flags for waiting for k8s-apps to be running
AppsRunning = "apps_running"
// AppsRunningKey is the name used in the flags for waiting for k8s-apps to be running
AppsRunningKey = "apps_running"
// NodePressureKey is the name used in the flags detecting node coditions such as
// disk, memory and PID pressure or network not ready.
NodePressureKey = "no_pressure"
// NodeReadyKey is the name used in the flags for waiting for the node status to be ready
NodeReadyKey = "node_ready"
)

// vars related to the --wait flag
var (
// DefaultComponents is map of the the default components to wait for
DefaultComponents = map[string]bool{APIServerWaitKey: true, SystemPodsWaitKey: true}
// NoWaitComponents is map of componets to wait for if specified 'none' or 'false'
NoComponents = map[string]bool{APIServerWaitKey: false, SystemPodsWaitKey: false, DefaultSAWaitKey: false, AppsRunning: false}
NoComponents = map[string]bool{APIServerWaitKey: false, SystemPodsWaitKey: false, DefaultSAWaitKey: false, AppsRunningKey: false, NodePressureKey: false, NodeReadyKey: false}
// AllComponents is map for waiting for all components.
AllComponents = map[string]bool{APIServerWaitKey: true, SystemPodsWaitKey: true, DefaultSAWaitKey: true, AppsRunning: true}
AllComponents = map[string]bool{APIServerWaitKey: true, SystemPodsWaitKey: true, DefaultSAWaitKey: true, AppsRunningKey: true, NodePressureKey: true, NodeReadyKey: true}
// DefaultWaitList is list of all default components to wait for. only names to be used for start flags.
DefaultWaitList = []string{APIServerWaitKey, SystemPodsWaitKey}
// AllComponentsList list of all valid components keys to wait for. only names to be used used for start flags.
AllComponentsList = []string{APIServerWaitKey, SystemPodsWaitKey, DefaultSAWaitKey, AppsRunning}
AllComponentsList = []string{APIServerWaitKey, SystemPodsWaitKey, DefaultSAWaitKey, AppsRunningKey, NodePressureKey, NodeReadyKey}
// AppsRunningList running list are valid k8s-app components to wait for them to be running
AppsRunningList = []string{
"kube-dns", // coredns
Expand Down
145 changes: 145 additions & 0 deletions pkg/minikube/bootstrapper/bsutil/kverify/node_health.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
/*
Copyright 2020 The Kubernetes Authors All rights reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

// Package kverify verifies a running kubernetes cluster is healthy
package kverify

import (
"fmt"
"runtime"
"time"

"github.com/golang/glog"
"github.com/pkg/errors"
v1 "k8s.io/api/core/v1"
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/kubernetes"
kconst "k8s.io/kubernetes/cmd/kubeadm/app/constants"
"k8s.io/minikube/pkg/minikube/driver"
"k8s.io/minikube/pkg/minikube/out"
)

// NodeConditions verfies that node is not under disk, memory, pid or network pressure.
medyagh marked this conversation as resolved.
Show resolved Hide resolved
func NodeConditions(cs *kubernetes.Clientset, drver string) error {
medyagh marked this conversation as resolved.
Show resolved Hide resolved
glog.Info("verifying NodePressure condition ...")
start := time.Now()
defer func() {
glog.Infof("duration metric: took %s to wait for NodePressure...", time.Since(start))
}()

ns, err := cs.CoreV1().Nodes().List(meta.ListOptions{})
if err != nil {
return errors.Wrap(err, "list nodes")
}

for _, n := range ns.Items {
glog.Infof("node storage ephemeral capacity is %s", n.Status.Capacity.StorageEphemeral())
glog.Infof("node cpu capacity is %s", n.Status.Capacity.Cpu().AsDec())
for _, c := range n.Status.Conditions {
if c.Type == v1.NodeDiskPressure && c.Status != v1.ConditionTrue {
out.Ln("")
medyagh marked this conversation as resolved.
Show resolved Hide resolved
out.ErrT(out.FailureType, "node {{.name}} has unwanted condition {{.condition_type}} : Reason {{.reason}} Message: {{.message}}", out.V{"name": n.Name, "condition_type": c.Type, "reason": c.Reason, "message": c.Message})
out.WarningT("The node on {{.name}} has ran out of disk space. please consider allocating more disk using or pruning un-used images", out.V{"name": n.Name})
if driver.IsKIC(drver) && runtime.GOOS != "linux" {
out.T(out.Stopped, "Please increase Docker Desktop's disk image size.")
if runtime.GOOS == "darwin" {
out.T(out.Documentation, "Documentation: {{.url}}", out.V{"url": "https://docs.docker.com/docker-for-mac/space/"})
}
if runtime.GOOS == "windows" {
out.T(out.Documentation, "Documentation: {{.url}}", out.V{"url": "https://docs.docker.com/docker-for-windows/"})
}
} else { // VM-drivers
out.T(out.Stopped, "You can specify a larger disk for your cluster using `minikube start --disk` ")
medyagh marked this conversation as resolved.
Show resolved Hide resolved
}
out.Ln("") // if there is error message, lets make an empty space for better visilibtly
return fmt.Errorf("node %q has unwanted condition %q : Reason %q Message: %q ", n.Name, c.Type, c.Reason, c.Message)
}

if c.Type == v1.NodeMemoryPressure && c.Status == v1.ConditionTrue {
out.Ln("")
out.ErrT(out.FailureType, "node {{.name}} has unwanted condition {{.condition_type}} : Reason {{.reason}} Message: {{.message}}", out.V{"name": n.Name, "condition_type": c.Type, "reason": c.Reason, "message": c.Message})
out.WarningT("The node on {{.name}} has ran of memory.", out.V{"name": n.Name})
if driver.IsKIC(drver) && runtime.GOOS != "linux" {
out.T(out.Stopped, "Please increase Docker Desktop's memory.")
if runtime.GOOS == "darwin" {
out.T(out.Documentation, "Documentation: {{.url}}", out.V{"url": "https://docs.docker.com/docker-for-mac/space/"})
}
if runtime.GOOS == "windows" {
out.T(out.Documentation, "Documentation: {{.url}}", out.V{"url": "https://docs.docker.com/docker-for-windows/"})
}
} else {
out.T(out.Stopped, "You can specify a larger memory size for your cluster using `minikube start --memory` ")
medyagh marked this conversation as resolved.
Show resolved Hide resolved
}
out.Ln("") // if there is error message, lets make an empty space for better visilibtly
return fmt.Errorf("node %q has unwanted condition %q : Reason %q Message: %q ", n.Name, c.Type, c.Reason, c.Message)
}

if c.Type == v1.NodePIDPressure && c.Status == v1.ConditionTrue {
out.Ln("")
out.ErrT(out.FailureType, "node {{.name}} has unwanted condition {{.condition_type}} : Reason {{.reason}} Message: {{.message}}", out.V{"name": n.Name, "condition_type": c.Type, "reason": c.Reason, "message": c.Message})
out.WarningT("The node has ran out of available PIDs.", out.V{"name": n.Name})
out.Ln("")
return fmt.Errorf("node %q has unwanted condition %q : Reason %q Message: %q ", n.Name, c.Type, c.Reason, c.Message)
}

if c.Type == v1.NodeNetworkUnavailable && c.Status == v1.ConditionTrue {
out.Ln("")
medyagh marked this conversation as resolved.
Show resolved Hide resolved
out.ErrT(out.FailureType, "node {{.name}} has unwanted condition {{.condition_type}} : Reason {{.reason}} Message: {{.message}}", out.V{"name": n.Name, "condition_type": c.Type, "reason": c.Reason, "message": c.Message})
out.WarningT("The node networking is not configured correctly.", out.V{"name": n.Name})
out.Ln("")
return fmt.Errorf("node %q has unwanted condition %q : Reason %q Message: %q ", n.Name, c.Type, c.Reason, c.Message)
}
}
}

return nil
}

// WaitForNodeReady waits for a node to be ready
func WaitForNodeReady(cs *kubernetes.Clientset, timeout time.Duration) error {
glog.Info("waiting for node to be ready ...")
start := time.Now()
defer func() {
glog.Infof("duration metric: took %s to wait for WaitForNodeReady...", time.Since(start))
}()
checkReady := func() (bool, error) {
if time.Since(start) > timeout {
return false, fmt.Errorf("wait for node to be ready timed out")
}
ns, err := cs.CoreV1().Nodes().List(meta.ListOptions{})
if err != nil {
glog.Infof("error listing nodes will retry: %v", err)
return false, nil
}

for _, n := range ns.Items {
for _, c := range n.Status.Conditions {
if c.Type == v1.NodeReady && c.Status != v1.ConditionTrue {
glog.Infof("node %q has unwanted condition %q : Reason %q Message: %q. will try. ", n.Name, c.Type, c.Reason, c.Message)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove space before colon, and trailing space.

return false, nil
}
}
}
return true, nil
}

if err := wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, checkReady); err != nil {
return errors.Wrapf(err, "wait node ready")
}

return nil
}
Loading