Skip to content
This repository has been archived by the owner on Oct 24, 2023. It is now read-only.

test: incorporate kured + auto mode into kamino vmss-prototype tests #4221

Merged
merged 1 commit into from
Feb 3, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 26 additions & 5 deletions test/e2e/kubernetes/kubernetes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2798,6 +2798,11 @@ var _ = Describe("Azure Container Cluster using the Kubernetes Orchestrator", fu
It("should be able to install vmss node prototype", func() {
if cfg.RunVMSSNodePrototype {
if eng.ExpandedDefinition.Properties.HasVMSSAgentPool() {
By("Installing kured with node annotations configuration")
cmd := exec.Command("k", "apply", "-f", filepath.Join(WorkloadDir, "kured-annotations.yaml"))
util.PrintCommand(cmd)
_, err := cmd.CombinedOutput()
Expect(err).NotTo(HaveOccurred())
nodes, err := node.GetReadyWithRetry(1*time.Second, cfg.Timeout)
Expect(err).NotTo(HaveOccurred())
var numAgentNodes int
Expand All @@ -2824,6 +2829,24 @@ var _ = Describe("Azure Container Cluster using the Kubernetes Orchestrator", fu
elapsed := time.Since(start)
log.Printf("Took %s to schedule %d Pods with large containers via DaemonSet\n", elapsed, numLargeContainerPods)
}
By("Marking all nodes as needing reboots")
for _, n := range nodes {
if n.IsLinux() && !controlPlaneNodeRegexp.MatchString(n.Metadata.Name) {
err = sshConn.ExecuteRemoteWithRetry(n.Metadata.Name, fmt.Sprintf("\"sudo touch /var/run/reboot-required\""), false, 30*time.Second, cfg.Timeout)
Expect(err).NotTo(HaveOccurred())
}
}
By("Waiting for one node to be marked as SchedulingDisabled by kured")
ready := node.WaitOnReadyMax(len(nodes)-1, 5*time.Second, cfg.Timeout)
Expect(ready).To(BeTrue())
By("Waiting for nodes to be be rebooted and annotated correctly")
_, err = node.WaitForNodesWithAnnotation(numAgentNodes, "weave.works/kured-most-recent-reboot-needed", "", 5*time.Second, cfg.Timeout)
Expect(err).NotTo(HaveOccurred())
_, err = node.WaitForNodesWithAnnotation(0, "weave.works/kured-reboot-in-progress", "", 1*time.Minute, cfg.Timeout)
Expect(err).NotTo(HaveOccurred())
By("Waiting for all nodes to be Ready again")
ready = node.WaitOnReady(len(nodes), 30*time.Second, cfg.Timeout)
Expect(ready).To(Equal(true))
By("Choosing a target VMSS node to use as the prototype")
var targetNode string
for _, n := range nodes {
Expand Down Expand Up @@ -2878,7 +2901,7 @@ var _ = Describe("Azure Container Cluster using the Kubernetes Orchestrator", fu
timeToLargeContainerDaemonsetRunningBaseline = time.Since(start)
log.Printf("Took %s for large-container-daemonset pod to reach Running state on new node\n", timeToLargeContainerDaemonsetRunningBaseline)
}
cmd := exec.Command("helm", "status", "vmss-prototype")
cmd = exec.Command("helm", "status", "vmss-prototype")
out, err := cmd.CombinedOutput()
if err == nil {
By("Found pre-existing 'vmss-prototype' helm release, deleting it...")
Expand All @@ -2893,7 +2916,7 @@ var _ = Describe("Azure Container Cluster using the Kubernetes Orchestrator", fu
} else {
commandArgsSlice = append(commandArgsSlice, []string{"vmss-prototype", cfg.KaminoVMSSPrototypeLocalChartPath}...)
}
commandArgsSlice = append(commandArgsSlice, []string{"--namespace", "default", "--set", "kamino.scheduleOnControlPlane=true", "--set", "kamino.newUpdatedNodes=2", "--set", "kamino.logLevel=DEBUG"}...)
commandArgsSlice = append(commandArgsSlice, []string{"--namespace", "default", "--set", "kamino.scheduleOnControlPlane=true", "--set", "kamino.newUpdatedNodes=2", "--set", "kamino.logLevel=DEBUG", "--set", fmt.Sprintf("kamino.targetVMSS=%s", vmssName), "--set", "kamino.auto.lastPatchAnnotation=weave.works/kured-most-recent-reboot-needed", "--set", "kamino.auto.pendingRebootAnnotation=weave.works/kured-reboot-in-progress", "--set", "kamino.auto.minimumReadyTime=1s"}...)
if cfg.KaminoVMSSPrototypeImageRegistry != "" {
commandArgsSlice = append(commandArgsSlice, []string{"--set", fmt.Sprintf("kamino.container.imageRegistry=%s", cfg.KaminoVMSSPrototypeImageRegistry)}...)
}
Expand All @@ -2904,9 +2927,7 @@ var _ = Describe("Azure Container Cluster using the Kubernetes Orchestrator", fu
commandArgsSlice = append(commandArgsSlice, []string{"--set", fmt.Sprintf("kamino.container.imageTag=%s", cfg.KaminoVMSSPrototypeImageTag), "--set", "kamino.container.pullByHash=false"}...)
}
if cfg.KaminoVMSSPrototypeDryRun {
commandArgsSlice = append(commandArgsSlice, []string{"--set", fmt.Sprintf("kamino.targetVMSS=%s", vmssName), "--set", "kamino.auto.lastPatchAnnotation=weave.works/kured-most-recent-reboot-needed", "--set", "kamino.auto.pendingRebootAnnotation=weave.works/kured-reboot-in-progress", "--set", "kamino.auto.dryRun=true"}...)
} else {
commandArgsSlice = append(commandArgsSlice, []string{"--set", fmt.Sprintf("kamino.targetNode=%s", targetNode)}...)
commandArgsSlice = append(commandArgsSlice, []string{"--set", "kamino.auto.dryRun=true"}...)
}
cmd = exec.Command("helm", commandArgsSlice...)
util.PrintCommand(cmd)
Expand Down
64 changes: 54 additions & 10 deletions test/e2e/kubernetes/node/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -384,17 +384,12 @@ func AreMaxNodesReady(nodeCount int) bool {
var ready int
if list != nil {
for _, node := range list.Nodes {
nodeReady := node.IsReady()
if !nodeReady {
return false
if node.IsReady() {
ready++
}
ready++
}
}
if ready <= nodeCount {
return true
}
return false
return ready <= nodeCount
}

// WaitOnReady will block until all nodes are in ready state
Expand Down Expand Up @@ -484,6 +479,40 @@ func WaitOnReadyMax(nodeCount int, sleep, timeout time.Duration) bool {
}
}

// WaitForNodesWithAnnotation will wait until the desired number of nodes have a particular annotation
func WaitForNodesWithAnnotation(nodeCount int, key, val string, sleep, timeout time.Duration) ([]Node, error) {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
ch := make(chan GetNodesResult)
var mostRecentWaitForNodesWithAnnotationError error
var nodes []Node
go func() {
for {
select {
case <-ctx.Done():
return
default:
ch <- GetByAnnotationsAsync(key, val)
time.Sleep(sleep)
}
}
}()
for {
select {
case result := <-ch:
mostRecentWaitForNodesWithAnnotationError = result.Err
nodes = result.Nodes
if mostRecentWaitForNodesWithAnnotationError == nil {
if len(nodes) == nodeCount {
return nodes, nil
}
}
case <-ctx.Done():
return nil, errors.Errorf("WaitForNodesWithAnnotation timed out: %s\n", mostRecentWaitForNodesWithAnnotationError)
}
}
}

// Get returns the current nodes for a given kubeconfig
func Get() (*List, error) {
cmd := exec.Command("k", "get", "nodes", "-o", "json")
Expand Down Expand Up @@ -724,6 +753,15 @@ func GetByLabel(label string) ([]Node, error) {
return nodes, nil
}

// GetByAnnotationsAsync wraps GetByAnnotations with a struct response for goroutine + channel usage
func GetByAnnotationsAsync(key, value string) GetNodesResult {
nodes, err := GetByAnnotations(key, value)
return GetNodesResult{
Nodes: nodes,
Err: err,
}
}

// GetByAnnotations will return a []Node of all nodes that have a matching annotation
func GetByAnnotations(key, value string) ([]Node, error) {
list, err := Get()
Expand All @@ -733,8 +771,14 @@ func GetByAnnotations(key, value string) ([]Node, error) {

nodes := make([]Node, 0)
for _, n := range list.Nodes {
if n.Metadata.Annotations[key] == value {
nodes = append(nodes, n)
if value != "" {
if n.Metadata.Annotations[key] == value {
nodes = append(nodes, n)
}
} else {
if _, ok := n.Metadata.Annotations[key]; ok {
nodes = append(nodes, n)
}
}
}
return nodes, nil
Expand Down
129 changes: 129 additions & 0 deletions test/e2e/kubernetes/workloads/kured-annotations.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: kured
rules:
# Allow kured to read spec.unschedulable
# Allow kubectl to drain/uncordon
#
# NB: These permissions are tightly coupled to the bundled version of kubectl; the ones below
# match https://github.com/kubernetes/kubernetes/blob/v1.19.4/staging/src/k8s.io/kubectl/pkg/cmd/drain/drain.go
#
- apiGroups: [""]
resources: ["nodes"]
verbs: ["get", "patch"]
- apiGroups: [""]
resources: ["pods"]
verbs: ["list","delete","get"]
- apiGroups: ["apps"]
resources: ["daemonsets"]
verbs: ["get"]
- apiGroups: [""]
resources: ["pods/eviction"]
verbs: ["create"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: kured
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: kured
subjects:
- kind: ServiceAccount
name: kured
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
namespace: kube-system
name: kured
rules:
# Allow kured to lock/unlock itself
- apiGroups: ["apps"]
resources: ["daemonsets"]
resourceNames: ["kured"]
verbs: ["update"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
namespace: kube-system
name: kured
subjects:
- kind: ServiceAccount
namespace: kube-system
name: kured
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: kured
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: kured
namespace: kube-system
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: kured # Must match `--ds-name`
namespace: kube-system # Must match `--ds-namespace`
spec:
selector:
matchLabels:
name: kured
updateStrategy:
type: RollingUpdate
template:
metadata:
labels:
name: kured
spec:
serviceAccountName: kured
tolerations:
- key: node-role.kubernetes.io/master
effect: NoSchedule
hostPID: true # Facilitate entering the host mount namespace via init
restartPolicy: Always
containers:
- name: kured
image: docker.io/jackfrancis/kured:node-annotations-chart-e9de81b
# If you find yourself here wondering why there is no
# :latest tag on Docker Hub,see the FAQ in the README
imagePullPolicy: IfNotPresent
securityContext:
privileged: true # Give permission to nsenter /proc/1/ns/mnt
env:
# Pass in the name of the node on which this pod is scheduled
# for use with drain/uncordon operations and lock acquisition
- name: KURED_NODE_ID
valueFrom:
fieldRef:
fieldPath: spec.nodeName
command:
- /usr/bin/kured
# - --alert-filter-regexp=^RebootRequired$
# - --blocking-pod-selector=runtime=long,cost=expensive
# - --blocking-pod-selector=name=temperamental
# - --blocking-pod-selector=...
# - --ds-name=kured
# - --ds-namespace=kube-system
# - --end-time=23:59:59
# - --lock-annotation=weave.works/kured-node-lock
- --period=1m
# - --prometheus-url=http://prometheus.monitoring.svc.cluster.local
# - --reboot-days=sun,mon,tue,wed,thu,fri,sat
# - --reboot-sentinel=/var/run/reboot-required
# - --slack-hook-url=https://hooks.slack.com/...
# - --slack-username=prod
# - --slack-channel=alerting
# - --message-template-drain=Draining node %s
# - --message-template-drain=Rebooting node %s
# - --start-time=0:00
# - --time-zone=UTC
- --annotate-nodes=true