From 34301776e7a865963d5dc503301fa1e34c70ba6a Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Tue, 1 Dec 2020 19:10:38 +0100 Subject: [PATCH 1/4] test: e2e: configurable pull policy In some cases (CI) it is useful to run NFD e2e tests using ephemeral clusters. To save time and bandwidth, it is also useful to prime the ephemeral cluster with the images under test. In these circumstances there is no risk of running a stale image, and having a `Always` PullPolicy hardcoded actually makes the whole exercise null. So we add a new option, disabled by default, to make the e2e manifest use the `IfNotPresent` pull policy, to effectively cover this use case. Signed-off-by: Francesco Romani --- Makefile | 7 +++++-- test/e2e/node_feature_discovery.go | 20 ++++++++++++++------ 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index 0d9cb62173..9d3e4dd998 100644 --- a/Makefile +++ b/Makefile @@ -54,6 +54,7 @@ HOSTMOUNT_PREFIX ?= / KUBECONFIG ?= E2E_TEST_CONFIG ?= +PULL_IF_NOT_PRESENT ?= LDFLAGS = -ldflags "-s -w -X sigs.k8s.io/node-feature-discovery/pkg/version.version=$(VERSION) -X sigs.k8s.io/node-feature-discovery/source.pathPrefix=$(HOSTMOUNT_PREFIX)" @@ -131,10 +132,12 @@ test: e2e-test: @if [ -z ${KUBECONFIG} ]; then echo "[ERR] KUBECONFIG missing, must be defined"; exit 1; fi $(GO_CMD) test -v ./test/e2e/ -args -nfd.repo=$(IMAGE_REPO) -nfd.tag=$(IMAGE_TAG_NAME) \ - -kubeconfig=$(KUBECONFIG) -nfd.e2e-config=$(E2E_TEST_CONFIG) -ginkgo.focus="\[kubernetes-sigs\]" \ + -kubeconfig=$(KUBECONFIG) -nfd.e2e-config=$(E2E_TEST_CONFIG) -nfd.pull-if-not-present=$(PULL_IF_NOT_PRESENT) \ + -ginkgo.focus="\[kubernetes-sigs\]" \ $(if $(OPENSHIFT),-nfd.openshift,) $(GO_CMD) test -v ./test/e2e/ -args -nfd.repo=$(IMAGE_REPO) -nfd.tag=$(IMAGE_TAG_NAME)-minimal \ - -kubeconfig=$(KUBECONFIG) -nfd.e2e-config=$(E2E_TEST_CONFIG) -ginkgo.focus="\[kubernetes-sigs\]" \ + -kubeconfig=$(KUBECONFIG) -nfd.e2e-config=$(E2E_TEST_CONFIG) -nfd.pull-if-not-present=$(PULL_IF_NOT_PRESENT) \ + -ginkgo.focus="\[kubernetes-sigs\]" \ $(if $(OPENSHIFT),-nfd.openshift,) push: diff --git a/test/e2e/node_feature_discovery.go b/test/e2e/node_feature_discovery.go index 25c36dd11a..f085e9ac99 100644 --- a/test/e2e/node_feature_discovery.go +++ b/test/e2e/node_feature_discovery.go @@ -46,10 +46,11 @@ import ( ) var ( - dockerRepo = flag.String("nfd.repo", "gcr.io/k8s-staging-nfd/node-feature-discovery", "Docker repository to fetch image from") - dockerTag = flag.String("nfd.tag", "master", "Docker tag to use") - e2eConfigFile = flag.String("nfd.e2e-config", "", "Configuration parameters for end-to-end tests") - openShift = flag.Bool("nfd.openshift", false, "Enable OpenShift specific bits") + dockerRepo = flag.String("nfd.repo", "gcr.io/k8s-staging-nfd/node-feature-discovery", "Docker repository to fetch image from") + dockerTag = flag.String("nfd.tag", "master", "Docker tag to use") + e2eConfigFile = flag.String("nfd.e2e-config", "", "Configuration parameters for end-to-end tests") + openShift = flag.Bool("nfd.openshift", false, "Enable OpenShift specific bits") + pullIfNotPresent = flag.Bool("nfd.pull-if-not-present", false, "Pull Images if not present - not always") conf *e2eConfig ) @@ -238,7 +239,7 @@ func nfdMasterPod(image string, onMasterNode bool) *v1.Pod { { Name: "node-feature-discovery", Image: image, - ImagePullPolicy: v1.PullAlways, + ImagePullPolicy: pullPolicy(), Command: []string{"nfd-master"}, Env: []v1.EnvVar{ { @@ -309,7 +310,7 @@ func nfdWorkerPodSpec(image string, extraArgs []string) v1.PodSpec { { Name: "node-feature-discovery", Image: image, - ImagePullPolicy: v1.PullAlways, + ImagePullPolicy: pullPolicy(), Command: []string{"nfd-worker"}, Args: append([]string{"--server=nfd-master-e2e:8080"}, extraArgs...), Env: []v1.EnvVar{ @@ -800,3 +801,10 @@ var _ = SIGDescribe("Node Feature Discovery", func() { }) }) + +func pullPolicy() v1.PullPolicy { + if *pullIfNotPresent { + return v1.PullIfNotPresent + } + return v1.PullAlways +} From 0983f2a72df890d4bf96953b57f610b296a50bbd Mon Sep 17 00:00:00 2001 From: Artyom Lukianov Date: Mon, 23 Nov 2020 18:37:42 +0200 Subject: [PATCH 2/4] Move helpers method to the separate package Signed-off-by: Artyom Lukianov --- go.mod | 1 + test/e2e/node_feature_discovery.go | 416 ++--------------------------- test/e2e/utils/config.go | 104 ++++++++ test/e2e/utils/pod.go | 199 ++++++++++++++ test/e2e/utils/rbac.go | 138 ++++++++++ test/e2e/utils/service.go | 45 ++++ 6 files changed, 506 insertions(+), 397 deletions(-) create mode 100644 test/e2e/utils/config.go create mode 100644 test/e2e/utils/pod.go create mode 100644 test/e2e/utils/rbac.go create mode 100644 test/e2e/utils/service.go diff --git a/go.mod b/go.mod index 39d434d364..dcc030b255 100644 --- a/go.mod +++ b/go.mod @@ -20,6 +20,7 @@ require ( golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c // indirect google.golang.org/grpc v1.38.0 google.golang.org/protobuf v1.27.1 + gopkg.in/yaml.v2 v2.4.0 k8s.io/api v0.22.0 k8s.io/apimachinery v0.22.0 k8s.io/client-go v0.22.0 diff --git a/test/e2e/node_feature_discovery.go b/test/e2e/node_feature_discovery.go index f085e9ac99..862fb8570a 100644 --- a/test/e2e/node_feature_discovery.go +++ b/test/e2e/node_feature_discovery.go @@ -20,18 +20,14 @@ import ( "context" "flag" "fmt" - "io/ioutil" "path/filepath" - "regexp" "strings" "time" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" - appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" - rbacv1 "k8s.io/api/rbac/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/uuid" clientset "k8s.io/client-go/kubernetes" @@ -42,375 +38,14 @@ import ( master "sigs.k8s.io/node-feature-discovery/pkg/nfd-master" "sigs.k8s.io/node-feature-discovery/source/custom" - "sigs.k8s.io/yaml" + testutils "sigs.k8s.io/node-feature-discovery/test/e2e/utils" ) var ( - dockerRepo = flag.String("nfd.repo", "gcr.io/k8s-staging-nfd/node-feature-discovery", "Docker repository to fetch image from") - dockerTag = flag.String("nfd.tag", "master", "Docker tag to use") - e2eConfigFile = flag.String("nfd.e2e-config", "", "Configuration parameters for end-to-end tests") - openShift = flag.Bool("nfd.openshift", false, "Enable OpenShift specific bits") - pullIfNotPresent = flag.Bool("nfd.pull-if-not-present", false, "Pull Images if not present - not always") - - conf *e2eConfig + dockerRepo = flag.String("nfd.repo", "gcr.io/k8s-staging-nfd/node-feature-discovery", "Docker repository to fetch image from") + dockerTag = flag.String("nfd.tag", "master", "Docker tag to use") ) -type e2eConfig struct { - DefaultFeatures *struct { - LabelWhitelist lookupMap - AnnotationWhitelist lookupMap - Nodes map[string]nodeConfig - } -} - -type nodeConfig struct { - nameRe *regexp.Regexp - ExpectedLabelValues map[string]string - ExpectedLabelKeys lookupMap - ExpectedAnnotationValues map[string]string - ExpectedAnnotationKeys lookupMap -} - -type lookupMap map[string]struct{} - -func (l *lookupMap) UnmarshalJSON(data []byte) error { - *l = lookupMap{} - slice := []string{} - - err := yaml.Unmarshal(data, &slice) - if err != nil { - return err - } - - for _, k := range slice { - (*l)[k] = struct{}{} - } - return nil -} - -func readConfig() { - // Read and parse only once - if conf != nil || *e2eConfigFile == "" { - return - } - - By("Reading end-to-end test configuration file") - data, err := ioutil.ReadFile(*e2eConfigFile) - Expect(err).NotTo(HaveOccurred()) - - By("Parsing end-to-end test configuration data") - err = yaml.Unmarshal(data, &conf) - Expect(err).NotTo(HaveOccurred()) - - // Pre-compile node name matching regexps - for name, nodeConf := range conf.DefaultFeatures.Nodes { - nodeConf.nameRe, err = regexp.Compile(name) - Expect(err).NotTo(HaveOccurred()) - conf.DefaultFeatures.Nodes[name] = nodeConf - } -} - -// Create required RBAC configuration -func configureRBAC(cs clientset.Interface, ns string) error { - _, err := createServiceAccount(cs, ns) - if err != nil { - return err - } - - _, err = createClusterRole(cs) - if err != nil { - return err - } - - _, err = createClusterRoleBinding(cs, ns) - if err != nil { - return err - } - - return nil -} - -// Remove RBAC configuration -func deconfigureRBAC(cs clientset.Interface, ns string) error { - err := cs.RbacV1().ClusterRoleBindings().Delete(context.TODO(), "nfd-master-e2e", metav1.DeleteOptions{}) - if err != nil { - return err - } - err = cs.RbacV1().ClusterRoles().Delete(context.TODO(), "nfd-master-e2e", metav1.DeleteOptions{}) - if err != nil { - return err - } - err = cs.CoreV1().ServiceAccounts(ns).Delete(context.TODO(), "nfd-master-e2e", metav1.DeleteOptions{}) - if err != nil { - return err - } - return nil -} - -// Configure service account required by NFD -func createServiceAccount(cs clientset.Interface, ns string) (*v1.ServiceAccount, error) { - sa := &v1.ServiceAccount{ - ObjectMeta: metav1.ObjectMeta{ - Name: "nfd-master-e2e", - Namespace: ns, - }, - } - return cs.CoreV1().ServiceAccounts(ns).Create(context.TODO(), sa, metav1.CreateOptions{}) -} - -// Configure cluster role required by NFD -func createClusterRole(cs clientset.Interface) (*rbacv1.ClusterRole, error) { - cr := &rbacv1.ClusterRole{ - ObjectMeta: metav1.ObjectMeta{ - Name: "nfd-master-e2e", - }, - Rules: []rbacv1.PolicyRule{ - { - APIGroups: []string{""}, - Resources: []string{"nodes"}, - Verbs: []string{"get", "patch", "update"}, - }, - }, - } - if *openShift { - cr.Rules = append(cr.Rules, - rbacv1.PolicyRule{ - // needed on OpenShift clusters - APIGroups: []string{"security.openshift.io"}, - Resources: []string{"securitycontextconstraints"}, - ResourceNames: []string{"hostaccess"}, - Verbs: []string{"use"}, - }) - } - return cs.RbacV1().ClusterRoles().Update(context.TODO(), cr, metav1.UpdateOptions{}) -} - -// Configure cluster role binding required by NFD -func createClusterRoleBinding(cs clientset.Interface, ns string) (*rbacv1.ClusterRoleBinding, error) { - crb := &rbacv1.ClusterRoleBinding{ - ObjectMeta: metav1.ObjectMeta{ - Name: "nfd-master-e2e", - }, - Subjects: []rbacv1.Subject{ - { - Kind: rbacv1.ServiceAccountKind, - Name: "nfd-master-e2e", - Namespace: ns, - }, - }, - RoleRef: rbacv1.RoleRef{ - APIGroup: rbacv1.GroupName, - Kind: "ClusterRole", - Name: "nfd-master-e2e", - }, - } - - return cs.RbacV1().ClusterRoleBindings().Update(context.TODO(), crb, metav1.UpdateOptions{}) -} - -// createService creates nfd-master Service -func createService(cs clientset.Interface, ns string) (*v1.Service, error) { - svc := &v1.Service{ - ObjectMeta: metav1.ObjectMeta{ - Name: "nfd-master-e2e", - }, - Spec: v1.ServiceSpec{ - Selector: map[string]string{"name": "nfd-master-e2e"}, - Ports: []v1.ServicePort{ - { - Protocol: v1.ProtocolTCP, - Port: 8080, - }, - }, - Type: v1.ServiceTypeClusterIP, - }, - } - return cs.CoreV1().Services(ns).Create(context.TODO(), svc, metav1.CreateOptions{}) -} - -func nfdMasterPod(image string, onMasterNode bool) *v1.Pod { - p := &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "nfd-master-" + string(uuid.NewUUID()), - Labels: map[string]string{"name": "nfd-master-e2e"}, - }, - Spec: v1.PodSpec{ - Containers: []v1.Container{ - { - Name: "node-feature-discovery", - Image: image, - ImagePullPolicy: pullPolicy(), - Command: []string{"nfd-master"}, - Env: []v1.EnvVar{ - { - Name: "NODE_NAME", - ValueFrom: &v1.EnvVarSource{ - FieldRef: &v1.ObjectFieldSelector{ - FieldPath: "spec.nodeName", - }, - }, - }, - }, - }, - }, - ServiceAccountName: "nfd-master-e2e", - RestartPolicy: v1.RestartPolicyNever, - }, - } - if onMasterNode { - p.Spec.NodeSelector = map[string]string{"node-role.kubernetes.io/master": ""} - p.Spec.Tolerations = []v1.Toleration{ - { - Key: "node-role.kubernetes.io/master", - Operator: v1.TolerationOpEqual, - Value: "", - Effect: v1.TaintEffectNoSchedule, - }, - } - } - return p -} - -func nfdWorkerPod(image string, extraArgs []string) *v1.Pod { - p := &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "nfd-worker-" + string(uuid.NewUUID()), - }, - Spec: nfdWorkerPodSpec(image, extraArgs), - } - - p.Spec.RestartPolicy = v1.RestartPolicyNever - - return p -} - -func nfdWorkerDaemonSet(image string, extraArgs []string) *appsv1.DaemonSet { - return &appsv1.DaemonSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "nfd-worker-" + string(uuid.NewUUID()), - }, - Spec: appsv1.DaemonSetSpec{ - Selector: &metav1.LabelSelector{ - MatchLabels: map[string]string{"name": "nfd-worker"}, - }, - Template: v1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{"name": "nfd-worker"}, - }, - Spec: nfdWorkerPodSpec(image, extraArgs), - }, - MinReadySeconds: 5, - }, - } -} - -func nfdWorkerPodSpec(image string, extraArgs []string) v1.PodSpec { - return v1.PodSpec{ - Containers: []v1.Container{ - { - Name: "node-feature-discovery", - Image: image, - ImagePullPolicy: pullPolicy(), - Command: []string{"nfd-worker"}, - Args: append([]string{"--server=nfd-master-e2e:8080"}, extraArgs...), - Env: []v1.EnvVar{ - { - Name: "NODE_NAME", - ValueFrom: &v1.EnvVarSource{ - FieldRef: &v1.ObjectFieldSelector{ - FieldPath: "spec.nodeName", - }, - }, - }, - }, - VolumeMounts: []v1.VolumeMount{ - { - Name: "host-boot", - MountPath: "/host-boot", - ReadOnly: true, - }, - { - Name: "host-os-release", - MountPath: "/host-etc/os-release", - ReadOnly: true, - }, - { - Name: "host-sys", - MountPath: "/host-sys", - ReadOnly: true, - }, - { - Name: "host-usr-lib", - MountPath: "/host-usr/lib", - ReadOnly: true, - }, - { - Name: "host-usr-src", - MountPath: "/host-usr/src", - ReadOnly: true, - }, - }, - }, - }, - ServiceAccountName: "nfd-master-e2e", - DNSPolicy: v1.DNSClusterFirstWithHostNet, - Volumes: []v1.Volume{ - { - Name: "host-boot", - VolumeSource: v1.VolumeSource{ - HostPath: &v1.HostPathVolumeSource{ - Path: "/boot", - Type: newHostPathType(v1.HostPathDirectory), - }, - }, - }, - { - Name: "host-os-release", - VolumeSource: v1.VolumeSource{ - HostPath: &v1.HostPathVolumeSource{ - Path: "/etc/os-release", - Type: newHostPathType(v1.HostPathFile), - }, - }, - }, - { - Name: "host-sys", - VolumeSource: v1.VolumeSource{ - HostPath: &v1.HostPathVolumeSource{ - Path: "/sys", - Type: newHostPathType(v1.HostPathDirectory), - }, - }, - }, - { - Name: "host-usr-lib", - VolumeSource: v1.VolumeSource{ - HostPath: &v1.HostPathVolumeSource{ - Path: "/usr/lib", - Type: newHostPathType(v1.HostPathDirectory), - }, - }, - }, - { - Name: "host-usr-src", - VolumeSource: v1.VolumeSource{ - HostPath: &v1.HostPathVolumeSource{ - Path: "/usr/src", - Type: newHostPathType(v1.HostPathDirectory), - }, - }, - }, - }, - } - -} - -func newHostPathType(typ v1.HostPathType) *v1.HostPathType { - hostPathType := new(v1.HostPathType) - *hostPathType = v1.HostPathType(typ) - return hostPathType -} - // cleanupNode deletes all NFD-related metadata from the Node object, i.e. // labels and annotations func cleanupNode(cs clientset.Interface) { @@ -466,18 +101,17 @@ var _ = SIGDescribe("Node Feature Discovery", func() { var masterPod *v1.Pod BeforeEach(func() { - err := configureRBAC(f.ClientSet, f.Namespace.Name) + err := testutils.ConfigureRBAC(f.ClientSet, f.Namespace.Name) Expect(err).NotTo(HaveOccurred()) // Launch nfd-master By("Creating nfd master pod and nfd-master service") image := fmt.Sprintf("%s:%s", *dockerRepo, *dockerTag) - masterPod = nfdMasterPod(image, false) - masterPod, err = f.ClientSet.CoreV1().Pods(f.Namespace.Name).Create(context.TODO(), masterPod, metav1.CreateOptions{}) - Expect(err).NotTo(HaveOccurred()) + masterPod = f.PodClient().CreateSync(testutils.NFDMasterPod(image, false)) + // Create nfd-master service // Create nfd-master service - nfdSvc, err := createService(f.ClientSet, f.Namespace.Name) + nfdSvc, err := testutils.CreateService(f.ClientSet, f.Namespace.Name) Expect(err).NotTo(HaveOccurred()) By("Waiting for the nfd-master pod to be running") @@ -488,7 +122,7 @@ var _ = SIGDescribe("Node Feature Discovery", func() { }) AfterEach(func() { - err := deconfigureRBAC(f.ClientSet, f.Namespace.Name) + err := testutils.DeconfigureRBAC(f.ClientSet, f.Namespace.Name) Expect(err).NotTo(HaveOccurred()) }) @@ -511,7 +145,7 @@ var _ = SIGDescribe("Node Feature Discovery", func() { // Launch nfd-worker By("Creating a nfd worker pod") image := fmt.Sprintf("%s:%s", *dockerRepo, *dockerTag) - workerPod := nfdWorkerPod(image, []string{"--oneshot", "--sources=fake"}) + workerPod := testutils.NFDWorkerPod(image, []string{"--oneshot", "--sources=fake"}) workerPod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Create(context.TODO(), workerPod, metav1.CreateOptions{}) Expect(err).NotTo(HaveOccurred()) @@ -547,21 +181,23 @@ var _ = SIGDescribe("Node Feature Discovery", func() { // Context("and nfd-workers as a daemonset with default sources enabled", func() { It("the node labels and annotations listed in the e2e config should be present", func() { - readConfig() - if conf == nil { + err := testutils.ReadConfig() + Expect(err).ToNot(HaveOccurred()) + + if testutils.E2EConfigFile == nil { Skip("no e2e-config was specified") } - if conf.DefaultFeatures == nil { + if testutils.E2EConfigFile.DefaultFeatures == nil { Skip("no 'defaultFeatures' specified in e2e-config") } - fConf := conf.DefaultFeatures + fConf := testutils.E2EConfigFile.DefaultFeatures // Remove pre-existing stale annotations and labels cleanupNode(f.ClientSet) By("Creating nfd-worker daemonset") - workerDS := nfdWorkerDaemonSet(fmt.Sprintf("%s:%s", *dockerRepo, *dockerTag), []string{}) - workerDS, err := f.ClientSet.AppsV1().DaemonSets(f.Namespace.Name).Create(context.TODO(), workerDS, metav1.CreateOptions{}) + workerDS := testutils.NFDWorkerDaemonSet(fmt.Sprintf("%s:%s", *dockerRepo, *dockerTag), []string{}) + workerDS, err = f.ClientSet.AppsV1().DaemonSets(f.Namespace.Name).Create(context.TODO(), workerDS, metav1.CreateOptions{}) Expect(err).NotTo(HaveOccurred()) By("Waiting for daemonset pods to be ready") @@ -572,14 +208,7 @@ var _ = SIGDescribe("Node Feature Discovery", func() { Expect(err).NotTo(HaveOccurred()) for _, node := range nodeList.Items { - var nodeConf *nodeConfig - for _, conf := range fConf.Nodes { - if conf.nameRe.MatchString(node.Name) { - e2elog.Logf("node %q matches rule %q", node.Name, conf.nameRe) - nodeConf = &conf - break - } - } + nodeConf := testutils.FindNodeConfig(node.Name) if nodeConf == nil { e2elog.Logf("node %q has no matching rule in e2e-config, skipping...", node.Name) continue @@ -717,7 +346,7 @@ var _ = SIGDescribe("Node Feature Discovery", func() { Expect(err).NotTo(HaveOccurred()) By("Creating nfd-worker daemonset with configmap mounted") - workerDS := nfdWorkerDaemonSet(fmt.Sprintf("%s:%s", *dockerRepo, *dockerTag), []string{}) + workerDS := testutils.NFDWorkerDaemonSet(fmt.Sprintf("%s:%s", *dockerRepo, *dockerTag), []string{}) // add configmap mount config volumeName1 := "custom-configs-extra1" @@ -801,10 +430,3 @@ var _ = SIGDescribe("Node Feature Discovery", func() { }) }) - -func pullPolicy() v1.PullPolicy { - if *pullIfNotPresent { - return v1.PullIfNotPresent - } - return v1.PullAlways -} diff --git a/test/e2e/utils/config.go b/test/e2e/utils/config.go new file mode 100644 index 0000000000..57fdec91c1 --- /dev/null +++ b/test/e2e/utils/config.go @@ -0,0 +1,104 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package utils + +import ( + "flag" + "io/ioutil" + "regexp" + + e2elog "k8s.io/kubernetes/test/e2e/framework/log" + + "gopkg.in/yaml.v2" +) + +var ( + configContent = flag.String("nfd.e2e-config", "", "Configuration parameters for end-to-end tests") + E2EConfigFile *e2eConfig +) + +type e2eConfig struct { + DefaultFeatures *struct { + LabelWhitelist lookupMap + AnnotationWhitelist lookupMap + Nodes map[string]NodeConfig + } +} + +type NodeConfig struct { + ExpectedLabelValues map[string]string + ExpectedLabelKeys lookupMap + ExpectedAnnotationValues map[string]string + ExpectedAnnotationKeys lookupMap + + nameRe *regexp.Regexp +} + +type lookupMap map[string]struct{} + +func (l *lookupMap) UnmarshalJSON(data []byte) error { + *l = lookupMap{} + + var slice []string + if err := yaml.Unmarshal(data, &slice); err != nil { + return err + } + + for _, k := range slice { + (*l)[k] = struct{}{} + } + return nil +} + +func ReadConfig() error { + // Read and parse only once + if E2EConfigFile != nil || *configContent == "" { + return nil + } + + data, err := ioutil.ReadFile(*configContent) + if err != nil { + return err + } + + if err := yaml.Unmarshal(data, E2EConfigFile); err != nil { + return err + } + + // Pre-compile node name matching regexps + for name, nodeConf := range E2EConfigFile.DefaultFeatures.Nodes { + nodeConf.nameRe, err = regexp.Compile(name) + if err != nil { + return err + } + E2EConfigFile.DefaultFeatures.Nodes[name] = nodeConf + } + + return nil +} + +func FindNodeConfig(nodeName string) *NodeConfig { + var nodeConf *NodeConfig + for _, conf := range E2EConfigFile.DefaultFeatures.Nodes { + if conf.nameRe.MatchString(nodeName) { + e2elog.Logf("node %q matches rule %q", nodeName, conf.nameRe) + nodeConf = &conf + break + } + } + return nodeConf +} diff --git a/test/e2e/utils/pod.go b/test/e2e/utils/pod.go new file mode 100644 index 0000000000..cd17fba0d6 --- /dev/null +++ b/test/e2e/utils/pod.go @@ -0,0 +1,199 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package utils + +import ( + "flag" + + appsv1 "k8s.io/api/apps/v1" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/uuid" +) + +var pullIfNotPresent = flag.Bool("nfd.pull-if-not-present", false, "Pull Images if not present - not always") + +// NFDMasterPod provide NFD master pod definition +func NFDMasterPod(image string, onMasterNode bool) *v1.Pod { + p := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "nfd-master-", + Labels: map[string]string{"name": "nfd-master-e2e"}, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "node-feature-discovery", + Image: image, + ImagePullPolicy: pullPolicy(), + Command: []string{"nfd-master"}, + Env: []v1.EnvVar{ + { + Name: "NODE_NAME", + ValueFrom: &v1.EnvVarSource{ + FieldRef: &v1.ObjectFieldSelector{ + FieldPath: "spec.nodeName", + }, + }, + }, + }, + }, + }, + ServiceAccountName: "nfd-master-e2e", + RestartPolicy: v1.RestartPolicyNever, + }, + } + if onMasterNode { + p.Spec.NodeSelector = map[string]string{"node-role.kubernetes.io/master": ""} + p.Spec.Tolerations = []v1.Toleration{ + { + Key: "node-role.kubernetes.io/master", + Operator: v1.TolerationOpEqual, + Value: "", + Effect: v1.TaintEffectNoSchedule, + }, + } + } + return p +} + +// NFDWorkerPod provides NFD worker pod definition +func NFDWorkerPod(image string, extraArgs []string) *v1.Pod { + p := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "nfd-worker-" + string(uuid.NewUUID()), + }, + Spec: *nfdWorkerPodSpec(image, extraArgs), + } + + p.Spec.RestartPolicy = v1.RestartPolicyNever + + return p +} + +// NFDWorkerDaemonSet provides the NFD daemon set worker definition +func NFDWorkerDaemonSet(image string, extraArgs []string) *appsv1.DaemonSet { + podSpec := nfdWorkerPodSpec(image, extraArgs) + return newDaemonSet("nfd-worker", podSpec) +} + +// newDaemonSet provide the new daemon set +func newDaemonSet(name string, podSpec *v1.PodSpec) *appsv1.DaemonSet { + return &appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: name + "-" + string(uuid.NewUUID()), + }, + Spec: appsv1.DaemonSetSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"name": name}, + }, + Template: v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"name": name}, + }, + Spec: *podSpec, + }, + MinReadySeconds: 5, + }, + } +} + +func nfdWorkerPodSpec(image string, extraArgs []string) *v1.PodSpec { + return &v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "node-feature-discovery", + Image: image, + ImagePullPolicy: pullPolicy(), + Command: []string{"nfd-worker"}, + Args: append([]string{"--server=nfd-master-e2e:8080"}, extraArgs...), + Env: []v1.EnvVar{ + { + Name: "NODE_NAME", + ValueFrom: &v1.EnvVarSource{ + FieldRef: &v1.ObjectFieldSelector{ + FieldPath: "spec.nodeName", + }, + }, + }, + }, + VolumeMounts: []v1.VolumeMount{ + { + Name: "host-boot", + MountPath: "/host-boot", + ReadOnly: true, + }, + { + Name: "host-os-release", + MountPath: "/host-etc/os-release", + ReadOnly: true, + }, + { + Name: "host-sys", + MountPath: "/host-sys", + ReadOnly: true, + }, + }, + }, + }, + ServiceAccountName: "nfd-master-e2e", + DNSPolicy: v1.DNSClusterFirstWithHostNet, + Volumes: []v1.Volume{ + { + Name: "host-boot", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{ + Path: "/boot", + Type: newHostPathType(v1.HostPathDirectory), + }, + }, + }, + { + Name: "host-os-release", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{ + Path: "/etc/os-release", + Type: newHostPathType(v1.HostPathFile), + }, + }, + }, + { + Name: "host-sys", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{ + Path: "/sys", + Type: newHostPathType(v1.HostPathDirectory), + }, + }, + }, + }, + } + +} + +func newHostPathType(typ v1.HostPathType) *v1.HostPathType { + hostPathType := new(v1.HostPathType) + *hostPathType = typ + return hostPathType +} + +func pullPolicy() v1.PullPolicy { + if *pullIfNotPresent { + return v1.PullIfNotPresent + } + return v1.PullAlways +} diff --git a/test/e2e/utils/rbac.go b/test/e2e/utils/rbac.go new file mode 100644 index 0000000000..157e5600ff --- /dev/null +++ b/test/e2e/utils/rbac.go @@ -0,0 +1,138 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package utils + +import ( + "context" + "flag" + + v1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + clientset "k8s.io/client-go/kubernetes" +) + +var ( + openShift = flag.Bool("nfd.openshift", false, "Enable OpenShift specific bits") +) + +// ConfigureRBAC creates required RBAC configuration +func ConfigureRBAC(cs clientset.Interface, ns string) error { + _, err := createServiceAccount(cs, ns) + if err != nil { + return err + } + + _, err = createClusterRole(cs) + if err != nil { + return err + } + + _, err = createClusterRoleBinding(cs, ns) + if err != nil { + return err + } + + return nil +} + +// DeconfigureRBAC removes RBAC configuration +func DeconfigureRBAC(cs clientset.Interface, ns string) error { + err := cs.RbacV1().ClusterRoleBindings().Delete(context.TODO(), "nfd-master-e2e", metav1.DeleteOptions{}) + if err != nil { + return err + } + err = cs.RbacV1().ClusterRoles().Delete(context.TODO(), "nfd-master-e2e", metav1.DeleteOptions{}) + if err != nil { + return err + } + err = cs.CoreV1().ServiceAccounts(ns).Delete(context.TODO(), "nfd-master-e2e", metav1.DeleteOptions{}) + if err != nil { + return err + } + return nil +} + +// Configure service account required by NFD +func createServiceAccount(cs clientset.Interface, ns string) (*v1.ServiceAccount, error) { + sa := &v1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: "nfd-master-e2e", + Namespace: ns, + }, + } + return cs.CoreV1().ServiceAccounts(ns).Create(context.TODO(), sa, metav1.CreateOptions{}) +} + +// Configure cluster role required by NFD +func createClusterRole(cs clientset.Interface) (*rbacv1.ClusterRole, error) { + cr := &rbacv1.ClusterRole{ + ObjectMeta: metav1.ObjectMeta{ + Name: "nfd-master-e2e", + }, + Rules: []rbacv1.PolicyRule{ + { + APIGroups: []string{""}, + Resources: []string{"nodes"}, + Verbs: []string{"get", "patch", "update"}, + }, + { + APIGroups: []string{"topology.node.k8s.io"}, + Resources: []string{"noderesourcetopologies"}, + Verbs: []string{ + "create", + "get", + "update", + }, + }, + }, + } + if *openShift { + cr.Rules = append(cr.Rules, + rbacv1.PolicyRule{ + // needed on OpenShift clusters + APIGroups: []string{"security.openshift.io"}, + Resources: []string{"securitycontextconstraints"}, + ResourceNames: []string{"hostaccess"}, + Verbs: []string{"use"}, + }) + } + return cs.RbacV1().ClusterRoles().Update(context.TODO(), cr, metav1.UpdateOptions{}) +} + +// Configure cluster role binding required by NFD +func createClusterRoleBinding(cs clientset.Interface, ns string) (*rbacv1.ClusterRoleBinding, error) { + crb := &rbacv1.ClusterRoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: "nfd-master-e2e", + }, + Subjects: []rbacv1.Subject{ + { + Kind: rbacv1.ServiceAccountKind, + Name: "nfd-master-e2e", + Namespace: ns, + }, + }, + RoleRef: rbacv1.RoleRef{ + APIGroup: rbacv1.GroupName, + Kind: "ClusterRole", + Name: "nfd-master-e2e", + }, + } + + return cs.RbacV1().ClusterRoleBindings().Update(context.TODO(), crb, metav1.UpdateOptions{}) +} diff --git a/test/e2e/utils/service.go b/test/e2e/utils/service.go new file mode 100644 index 0000000000..1ea3b08d45 --- /dev/null +++ b/test/e2e/utils/service.go @@ -0,0 +1,45 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package utils + +import ( + "context" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + clientset "k8s.io/client-go/kubernetes" +) + +// CreateService creates nfd-master Service +func CreateService(cs clientset.Interface, ns string) (*v1.Service, error) { + svc := &v1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: "nfd-master-e2e", + }, + Spec: v1.ServiceSpec{ + Selector: map[string]string{"name": "nfd-master-e2e"}, + Ports: []v1.ServicePort{ + { + Protocol: v1.ProtocolTCP, + Port: 8080, + }, + }, + Type: v1.ServiceTypeClusterIP, + }, + } + return cs.CoreV1().Services(ns).Create(context.TODO(), svc, metav1.CreateOptions{}) +} From b0ad62cbc218cda9ea5de134a67dd626cae3301f Mon Sep 17 00:00:00 2001 From: Artyom Lukianov Date: Mon, 7 Dec 2020 18:09:51 +0200 Subject: [PATCH 3/4] e2e: add basic topology updater test Co-authored-by: Swati Sehgal Co-authored-by: Francesco Romani Signed-off-by: Artyom Lukianov --- go.mod | 2 + test/e2e/topology_updater.go | 469 +++++++++++++++++++++++++++++++++++ test/e2e/utils/node.go | 87 +++++++ test/e2e/utils/pod.go | 166 +++++++++++++ 4 files changed, 724 insertions(+) create mode 100644 test/e2e/topology_updater.go create mode 100644 test/e2e/utils/node.go diff --git a/go.mod b/go.mod index dcc030b255..e4b2ce5949 100644 --- a/go.mod +++ b/go.mod @@ -22,11 +22,13 @@ require ( google.golang.org/protobuf v1.27.1 gopkg.in/yaml.v2 v2.4.0 k8s.io/api v0.22.0 + k8s.io/apiextensions-apiserver v0.22.0 k8s.io/apimachinery v0.22.0 k8s.io/client-go v0.22.0 k8s.io/klog/v2 v2.9.0 k8s.io/kubelet v0.0.0 k8s.io/kubernetes v1.22.0 + k8s.io/utils v0.0.0-20210707171843-4b05e18ac7d9 sigs.k8s.io/yaml v1.2.0 ) diff --git a/test/e2e/topology_updater.go b/test/e2e/topology_updater.go new file mode 100644 index 0000000000..4e7c8e185e --- /dev/null +++ b/test/e2e/topology_updater.go @@ -0,0 +1,469 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "context" + "fmt" + "strings" + "time" + + "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha1" + topologyclientset "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/generated/clientset/versioned" + "github.com/onsi/ginkgo" + "github.com/onsi/gomega" + + v1 "k8s.io/api/core/v1" + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + extclient "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" + "k8s.io/kubernetes/test/e2e/framework" + e2ekubelet "k8s.io/kubernetes/test/e2e/framework/kubelet" + e2enetwork "k8s.io/kubernetes/test/e2e/framework/network" + e2epod "k8s.io/kubernetes/test/e2e/framework/pod" + + "sigs.k8s.io/node-feature-discovery/test/e2e/utils" + testutils "sigs.k8s.io/node-feature-discovery/test/e2e/utils" +) + +var _ = ginkgo.Describe("[kubernetes-sigs] Node topology updater", func() { + var ( + extClient *extclient.Clientset + topologyClient *topologyclientset.Clientset + crd *apiextensionsv1.CustomResourceDefinition + topologyUpdaterNode *v1.Node + workerNodes []v1.Node + kubeletConfig *kubeletconfig.KubeletConfiguration + namespace string + ) + + f := framework.NewDefaultFramework("node-topology-updater") + + ginkgo.BeforeEach(func() { + var err error + + if extClient == nil { + extClient, err = extclient.NewForConfig(f.ClientConfig()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + } + + if topologyClient == nil { + topologyClient, err = topologyclientset.NewForConfig(f.ClientConfig()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + } + + ginkgo.By("Creating the node resource topologies CRD") + crd, err = CreateNodeResourceTopologies(extClient) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + err = testutils.ConfigureRBAC(f.ClientSet, f.Namespace.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + image := fmt.Sprintf("%s:%s", *dockerRepo, *dockerTag) + f.PodClient().CreateSync(testutils.NFDMasterPod(image, false)) + + // Create nfd-master service + masterService, err := testutils.CreateService(f.ClientSet, f.Namespace.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("Waiting for the nfd-master service to be up") + gomega.Expect(e2enetwork.WaitForService(f.ClientSet, f.Namespace.Name, masterService.Name, true, time.Second, 10*time.Second)).NotTo(gomega.HaveOccurred()) + + ginkgo.By("Creating nfd-topology-updater daemonset") + topologyUpdaterDaemonSet := testutils.NFDTopologyUpdaterDaemonSet(fmt.Sprintf("%s:%s", *dockerRepo, *dockerTag), []string{}) + topologyUpdaterDaemonSet, err = f.ClientSet.AppsV1().DaemonSets(f.Namespace.Name).Create(context.TODO(), topologyUpdaterDaemonSet, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("Waiting for daemonset pods to be ready") + gomega.Expect(e2epod.WaitForPodsReady(f.ClientSet, f.Namespace.Name, topologyUpdaterDaemonSet.Spec.Template.Labels["name"], 5)).NotTo(gomega.HaveOccurred()) + + label := labels.SelectorFromSet(map[string]string{"name": topologyUpdaterDaemonSet.Spec.Template.Labels["name"]}) + pods, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).List(context.TODO(), metav1.ListOptions{LabelSelector: label.String()}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(pods.Items).ToNot(gomega.BeEmpty()) + + topologyUpdaterNode, err = f.ClientSet.CoreV1().Nodes().Get(context.TODO(), pods.Items[0].Spec.NodeName, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + kubeletConfig, err = e2ekubelet.GetCurrentKubeletConfig(topologyUpdaterNode.Name, "", true) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + workerNodes, err = utils.GetWorkerNodes(f) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + namespace = f.Namespace.Name + + }) + + ginkgo.Context("with single nfd-master pod", func() { + ginkgo.It("should fill the node resource topologies CR with the data", func() { + nodeTopology := getNodeTopology(topologyClient, topologyUpdaterNode.Name, namespace) + isValid := isValidNodeTopology(nodeTopology, kubeletConfig) + gomega.Expect(isValid).To(gomega.BeTrue(), "received invalid topology: %v", nodeTopology) + }) + + ginkgo.It("it should not account for any cpus if a container doesn't request exclusive cpus (best effort QOS)", func() { + ginkgo.By("getting the initial topology information") + initialNodeTopo := getNodeTopology(topologyClient, topologyUpdaterNode.Name, namespace) + ginkgo.By("creating a pod consuming resources from the shared, non-exclusive CPU pool (best-effort QoS)") + sleeperPod := testutils.BestEffortSleeperPod() + + podMap := make(map[string]*v1.Pod) + pod := f.PodClient().CreateSync(sleeperPod) + podMap[pod.Name] = pod + defer testutils.DeletePodsAsync(f, podMap) + + cooldown := 30 * time.Second + ginkgo.By(fmt.Sprintf("getting the updated topology - sleeping for %v", cooldown)) + // the object, hance the resource version must NOT change, so we can only sleep + time.Sleep(cooldown) + ginkgo.By("checking the changes in the updated topology - expecting none") + finalNodeTopo := getNodeTopology(topologyClient, topologyUpdaterNode.Name, namespace) + + initialAllocRes := allocatableResourceListFromNodeResourceTopology(initialNodeTopo) + finalAllocRes := allocatableResourceListFromNodeResourceTopology(finalNodeTopo) + if len(initialAllocRes) == 0 || len(finalAllocRes) == 0 { + ginkgo.Fail(fmt.Sprintf("failed to find allocatable resources from node topology initial=%v final=%v", initialAllocRes, finalAllocRes)) + } + zoneName, resName, cmp, ok := cmpAllocatableResources(initialAllocRes, finalAllocRes) + framework.Logf("zone=%q resource=%q cmp=%v ok=%v", zoneName, resName, cmp, ok) + if !ok { + ginkgo.Fail(fmt.Sprintf("failed to compare allocatable resources from node topology initial=%v final=%v", initialAllocRes, finalAllocRes)) + } + + // This is actually a workaround. + // Depending on the (random, by design) order on which ginkgo runs the tests, a test which exclusively allocates CPUs may run before. + // We cannot (nor should) care about what runs before this test, but we know that this may happen. + // The proper solution is to wait for ALL the container requesting exclusive resources to be gone before to end the related test. + // To date, we don't yet have a clean way to wait for these pod (actually containers) to be completely gone + // (hence, releasing the exclusively allocated CPUs) before to end the test, so this test can run with some leftovers hanging around, + // which makes the accounting harder. And this is what we handle here. + isGreaterEqual := (cmp >= 0) + gomega.Expect(isGreaterEqual).To(gomega.BeTrue(), fmt.Sprintf("final allocatable resources not restored - cmp=%d initial=%v final=%v", cmp, initialAllocRes, finalAllocRes)) + }) + + ginkgo.It("it should not account for any cpus if a container doesn't request exclusive cpus (guaranteed QOS, nonintegral cpu request)", func() { + ginkgo.By("getting the initial topology information") + initialNodeTopo := getNodeTopology(topologyClient, topologyUpdaterNode.Name, namespace) + ginkgo.By("creating a pod consuming resources from the shared, non-exclusive CPU pool (guaranteed QoS, nonintegral request)") + sleeperPod := testutils.GuaranteedSleeperPod("500m") + + podMap := make(map[string]*v1.Pod) + pod := f.PodClient().CreateSync(sleeperPod) + podMap[pod.Name] = pod + defer testutils.DeletePodsAsync(f, podMap) + + cooldown := 30 * time.Second + ginkgo.By(fmt.Sprintf("getting the updated topology - sleeping for %v", cooldown)) + // the object, hance the resource version must NOT change, so we can only sleep + time.Sleep(cooldown) + ginkgo.By("checking the changes in the updated topology - expecting none") + finalNodeTopo := getNodeTopology(topologyClient, topologyUpdaterNode.Name, namespace) + + initialAllocRes := allocatableResourceListFromNodeResourceTopology(initialNodeTopo) + finalAllocRes := allocatableResourceListFromNodeResourceTopology(finalNodeTopo) + if len(initialAllocRes) == 0 || len(finalAllocRes) == 0 { + ginkgo.Fail(fmt.Sprintf("failed to find allocatable resources from node topology initial=%v final=%v", initialAllocRes, finalAllocRes)) + } + zoneName, resName, cmp, ok := cmpAllocatableResources(initialAllocRes, finalAllocRes) + framework.Logf("zone=%q resource=%q cmp=%v ok=%v", zoneName, resName, cmp, ok) + if !ok { + ginkgo.Fail(fmt.Sprintf("failed to compare allocatable resources from node topology initial=%v final=%v", initialAllocRes, finalAllocRes)) + } + + // This is actually a workaround. + // Depending on the (random, by design) order on which ginkgo runs the tests, a test which exclusively allocates CPUs may run before. + // We cannot (nor should) care about what runs before this test, but we know that this may happen. + // The proper solution is to wait for ALL the container requesting exclusive resources to be gone before to end the related test. + // To date, we don't yet have a clean way to wait for these pod (actually containers) to be completely gone + // (hence, releasing the exclusively allocated CPUs) before to end the test, so this test can run with some leftovers hanging around, + // which makes the accounting harder. And this is what we handle here. + isGreaterEqual := (cmp >= 0) + gomega.Expect(isGreaterEqual).To(gomega.BeTrue(), fmt.Sprintf("final allocatable resources not restored - cmp=%d initial=%v final=%v", cmp, initialAllocRes, finalAllocRes)) + }) + + ginkgo.It("it should account for containers requesting exclusive cpus", func() { + nodes, err := testutils.FilterNodesWithEnoughCores(workerNodes, "1000m") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + if len(nodes) < 1 { + ginkgo.Skip("not enough allocatable cores for this test") + } + + ginkgo.By("getting the initial topology information") + initialNodeTopo := getNodeTopology(topologyClient, topologyUpdaterNode.Name, namespace) + ginkgo.By("creating a pod consuming exclusive CPUs") + sleeperPod := testutils.GuaranteedSleeperPod("1000m") + + podMap := make(map[string]*v1.Pod) + pod := f.PodClient().CreateSync(sleeperPod) + podMap[pod.Name] = pod + defer testutils.DeletePodsAsync(f, podMap) + + ginkgo.By("getting the updated topology") + var finalNodeTopo *v1alpha1.NodeResourceTopology + gomega.Eventually(func() bool { + finalNodeTopo, err = topologyClient.TopologyV1alpha1().NodeResourceTopologies(namespace).Get(context.TODO(), topologyUpdaterNode.Name, metav1.GetOptions{}) + if err != nil { + framework.Logf("failed to get the node topology resource: %v", err) + return false + } + return finalNodeTopo.ObjectMeta.ResourceVersion != initialNodeTopo.ObjectMeta.ResourceVersion + }, time.Minute, 5*time.Second).Should(gomega.BeTrue(), "didn't get updated node topology info") + ginkgo.By("checking the changes in the updated topology") + + initialAllocRes := allocatableResourceListFromNodeResourceTopology(initialNodeTopo) + finalAllocRes := allocatableResourceListFromNodeResourceTopology(finalNodeTopo) + if len(initialAllocRes) == 0 || len(finalAllocRes) == 0 { + ginkgo.Fail(fmt.Sprintf("failed to find allocatable resources from node topology initial=%v final=%v", initialAllocRes, finalAllocRes)) + } + zoneName, resName, isLess := lessAllocatableResources(initialAllocRes, finalAllocRes) + framework.Logf("zone=%q resource=%q isLess=%v", zoneName, resName, isLess) + gomega.Expect(isLess).To(gomega.BeTrue(), fmt.Sprintf("final allocatable resources not decreased - initial=%v final=%v", initialAllocRes, finalAllocRes)) + }) + + }) + + ginkgo.JustAfterEach(func() { + err := testutils.DeconfigureRBAC(f.ClientSet, f.Namespace.Name) + if err != nil { + framework.Logf("failed to delete RBAC resources: %v", err) + } + + err = extClient.ApiextensionsV1().CustomResourceDefinitions().Delete(context.TODO(), crd.Name, metav1.DeleteOptions{}) + if err != nil { + framework.Logf("failed to delete node resources topologies CRD: %v", err) + } + }) +}) + +const nodeResourceTopologiesName = "noderesourcetopologies.topology.node.k8s.io" + +func newNodeResourceTopologies() *apiextensionsv1.CustomResourceDefinition { + return &apiextensionsv1.CustomResourceDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: nodeResourceTopologiesName, + Annotations: map[string]string{ + "api-approved.kubernetes.io": "https://github.com/kubernetes/enhancements/pull/1870", + }, + }, + Spec: apiextensionsv1.CustomResourceDefinitionSpec{ + Group: "topology.node.k8s.io", + Names: apiextensionsv1.CustomResourceDefinitionNames{ + Plural: "noderesourcetopologies", + Singular: "noderesourcetopology", + ShortNames: []string{ + "node-res-topo", + }, + Kind: "NodeResourceTopology", + }, + Scope: "Namespaced", + Versions: []apiextensionsv1.CustomResourceDefinitionVersion{ + { + Name: "v1alpha1", + Schema: &apiextensionsv1.CustomResourceValidation{ + OpenAPIV3Schema: &apiextensionsv1.JSONSchemaProps{ + Type: "object", + Properties: map[string]apiextensionsv1.JSONSchemaProps{ + "topologyPolicies": { + Type: "array", + Items: &apiextensionsv1.JSONSchemaPropsOrArray{ + Schema: &apiextensionsv1.JSONSchemaProps{ + Type: "string", + }, + }, + }, + }, + }, + }, + Served: true, + Storage: true, + }, + }, + }, + } +} + +func CreateNodeResourceTopologies(extClient extclient.Interface) (*apiextensionsv1.CustomResourceDefinition, error) { + crd, err := extClient.ApiextensionsV1().CustomResourceDefinitions().Get(context.TODO(), nodeResourceTopologiesName, metav1.GetOptions{}) + if err != nil && !errors.IsNotFound(err) { + return nil, err + } + + if err == nil { + return crd, nil + } + + crd, err = extClient.ApiextensionsV1().CustomResourceDefinitions().Create(context.TODO(), newNodeResourceTopologies(), metav1.CreateOptions{}) + if err != nil { + return nil, err + } + + return crd, nil +} + +func getNodeTopology(topologyClient *topologyclientset.Clientset, nodeName, namespace string) *v1alpha1.NodeResourceTopology { + var nodeTopology *v1alpha1.NodeResourceTopology + var err error + gomega.EventuallyWithOffset(1, func() bool { + nodeTopology, err = topologyClient.TopologyV1alpha1().NodeResourceTopologies(namespace).Get(context.TODO(), nodeName, metav1.GetOptions{}) + if err != nil { + framework.Logf("failed to get the node topology resource: %v", err) + return false + } + return true + }, time.Minute, 5*time.Second).Should(gomega.BeTrue()) + return nodeTopology +} + +func isValidNodeTopology(nodeTopology *v1alpha1.NodeResourceTopology, kubeletConfig *kubeletconfig.KubeletConfiguration) bool { + if nodeTopology == nil || len(nodeTopology.TopologyPolicies) == 0 { + framework.Logf("failed to get topology policy from the node topology resource") + return false + } + + if nodeTopology.TopologyPolicies[0] != (*kubeletConfig).TopologyManagerPolicy { + return false + } + + if nodeTopology.Zones == nil || len(nodeTopology.Zones) == 0 { + framework.Logf("failed to get topology zones from the node topology resource") + return false + } + + foundNodes := 0 + for _, zone := range nodeTopology.Zones { + // TODO constant not in the APIs + if !strings.HasPrefix(strings.ToUpper(zone.Type), "NODE") { + continue + } + foundNodes++ + + if !isValidCostList(zone.Name, zone.Costs) { + return false + } + + if !isValidResourceList(zone.Name, zone.Resources) { + return false + } + } + return foundNodes > 0 +} + +func isValidCostList(zoneName string, costs v1alpha1.CostList) bool { + if len(costs) == 0 { + framework.Logf("failed to get topology costs for zone %q from the node topology resource", zoneName) + return false + } + + // TODO cross-validate zone names + for _, cost := range costs { + if cost.Name == "" || cost.Value < 0 { + framework.Logf("malformed cost %v for zone %q", cost, zoneName) + } + } + return true +} + +func isValidResourceList(zoneName string, resources v1alpha1.ResourceInfoList) bool { + if len(resources) == 0 { + framework.Logf("failed to get topology resources for zone %q from the node topology resource", zoneName) + return false + } + foundCpu := false + for _, resource := range resources { + // TODO constant not in the APIs + if strings.ToUpper(resource.Name) == "CPU" { + foundCpu = true + } + allocatable, ok1 := resource.Allocatable.AsInt64() + capacity, ok2 := resource.Capacity.AsInt64() + if (!ok1 || !ok2) || ((allocatable < 0 || capacity < 0) || (capacity < allocatable)) { + framework.Logf("malformed resource %v for zone %q", resource, zoneName) + return false + } + } + return foundCpu +} + +func allocatableResourceListFromNodeResourceTopology(nodeTopo *v1alpha1.NodeResourceTopology) map[string]v1.ResourceList { + allocRes := make(map[string]v1.ResourceList) + for _, zone := range nodeTopo.Zones { + if zone.Type != "Node" { + continue + } + resList := make(v1.ResourceList) + for _, res := range zone.Resources { + resList[v1.ResourceName(res.Name)] = res.Allocatable.DeepCopy() + } + if len(resList) == 0 { + continue + } + allocRes[zone.Name] = resList + } + return allocRes +} + +func lessAllocatableResources(expected, got map[string]v1.ResourceList) (string, string, bool) { + zoneName, resName, cmp, ok := cmpAllocatableResources(expected, got) + if !ok { + framework.Logf("-> cmp failed (not ok)") + return "", "", false + } + if cmp < 0 { + return zoneName, resName, true + } + framework.Logf("-> cmp failed (value=%d)", cmp) + return "", "", false +} + +func cmpAllocatableResources(expected, got map[string]v1.ResourceList) (string, string, int, bool) { + if len(got) != len(expected) { + framework.Logf("-> expected=%v (len=%d) got=%v (len=%d)", expected, len(expected), got, len(got)) + return "", "", 0, false + } + for expZoneName, expResList := range expected { + gotResList, ok := got[expZoneName] + if !ok { + return expZoneName, "", 0, false + } + if resName, cmp, ok := cmpResourceList(expResList, gotResList); !ok || cmp != 0 { + return expZoneName, resName, cmp, ok + } + } + return "", "", 0, true +} + +func cmpResourceList(expected, got v1.ResourceList) (string, int, bool) { + if len(got) != len(expected) { + framework.Logf("-> expected=%v (len=%d) got=%v (len=%d)", expected, len(expected), got, len(got)) + return "", 0, false + } + for expResName, expResQty := range expected { + gotResQty, ok := got[expResName] + if !ok { + return string(expResName), 0, false + } + if cmp := gotResQty.Cmp(expResQty); cmp != 0 { + framework.Logf("-> resource=%q cmp=%d expected=%v got=%v", expResName, cmp, expResQty, gotResQty) + return string(expResName), cmp, true + } + } + return "", 0, true +} diff --git a/test/e2e/utils/node.go b/test/e2e/utils/node.go new file mode 100644 index 0000000000..a313b43bd0 --- /dev/null +++ b/test/e2e/utils/node.go @@ -0,0 +1,87 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package utils + +import ( + "context" + "fmt" + + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/kubernetes/test/e2e/framework" +) + +const ( + // RoleWorker contains the worker role + RoleWorker = "worker" +) + +const ( + // LabelRole contains the key for the role label + LabelRole = "node-role.kubernetes.io" + // LabelHostname contains the key for the hostname label + LabelHostname = "kubernetes.io/hostname" +) + +// GetWorkerNodes returns all nodes labeled as worker +func GetWorkerNodes(f *framework.Framework) ([]v1.Node, error) { + return GetNodesByRole(f, RoleWorker) +} + +// GetByRole returns all nodes with the specified role +func GetNodesByRole(f *framework.Framework, role string) ([]v1.Node, error) { + selector, err := labels.Parse(fmt.Sprintf("%s/%s=", LabelRole, role)) + if err != nil { + return nil, err + } + return GetNodesBySelector(f, selector) +} + +// GetBySelector returns all nodes with the specified selector +func GetNodesBySelector(f *framework.Framework, selector labels.Selector) ([]v1.Node, error) { + nodes, err := f.ClientSet.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{LabelSelector: selector.String()}) + if err != nil { + return nil, err + } + return nodes.Items, nil +} + +// FilterNodesWithEnoughCores returns all nodes with at least the amount of given CPU allocatable +func FilterNodesWithEnoughCores(nodes []v1.Node, cpuAmount string) ([]v1.Node, error) { + requestCpu := resource.MustParse(cpuAmount) + framework.Logf("checking request %v on %d nodes", requestCpu, len(nodes)) + + resNodes := []v1.Node{} + for _, node := range nodes { + availCpu, ok := node.Status.Allocatable[v1.ResourceCPU] + if !ok || availCpu.IsZero() { + return nil, fmt.Errorf("node %q has no allocatable CPU", node.Name) + } + + if availCpu.Cmp(requestCpu) < 1 { + framework.Logf("node %q available cpu %v requested cpu %v", node.Name, availCpu, requestCpu) + continue + } + + framework.Logf("node %q has enough resources, cluster OK", node.Name) + resNodes = append(resNodes, node) + } + + return resNodes, nil +} diff --git a/test/e2e/utils/pod.go b/test/e2e/utils/pod.go index cd17fba0d6..aa64877317 100644 --- a/test/e2e/utils/pod.go +++ b/test/e2e/utils/pod.go @@ -18,15 +18,93 @@ package utils import ( "flag" + "sync" + + "github.com/onsi/ginkgo" appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/uuid" + "k8s.io/kubernetes/test/e2e/framework" + "k8s.io/utils/pointer" ) var pullIfNotPresent = flag.Bool("nfd.pull-if-not-present", false, "Pull Images if not present - not always") +const ( + CentosImage = "quay.io/centos/centos:8" +) + +func GuaranteedSleeperPod(cpuLimit string) *v1.Pod { + return &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "sleeper-gu-pod", + }, + Spec: v1.PodSpec{ + RestartPolicy: v1.RestartPolicyNever, + Containers: []v1.Container{ + v1.Container{ + Name: "sleeper-gu-cnt", + Image: CentosImage, + // 1 hour (or >= 1h in general) is "forever" for our purposes + Command: []string{"/bin/sleep", "1h"}, + Resources: v1.ResourceRequirements{ + Limits: v1.ResourceList{ + // we use 1 core because that's the minimal meaningful quantity + v1.ResourceName(v1.ResourceCPU): resource.MustParse(cpuLimit), + // any random reasonable amount is fine + v1.ResourceName(v1.ResourceMemory): resource.MustParse("100Mi"), + }, + }, + }, + }, + }, + } +} + +func BestEffortSleeperPod() *v1.Pod { + return &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "sleeper-be-pod", + }, + Spec: v1.PodSpec{ + RestartPolicy: v1.RestartPolicyNever, + Containers: []v1.Container{ + v1.Container{ + Name: "sleeper-be-cnt", + Image: CentosImage, + // 1 hour (or >= 1h in general) is "forever" for our purposes + Command: []string{"/bin/sleep", "1h"}, + }, + }, + }, + } +} + +func DeletePodsAsync(f *framework.Framework, podMap map[string]*v1.Pod) { + var wg sync.WaitGroup + for _, pod := range podMap { + wg.Add(1) + go func(podNS, podName string) { + defer ginkgo.GinkgoRecover() + defer wg.Done() + + DeletePodSyncByName(f, podName) + }(pod.Namespace, pod.Name) + } + wg.Wait() +} + +func DeletePodSyncByName(f *framework.Framework, podName string) { + gp := int64(0) + delOpts := metav1.DeleteOptions{ + GracePeriodSeconds: &gp, + } + f.PodClient().DeleteSync(podName, delOpts, framework.DefaultPodDeletionTimeout) +} + // NFDMasterPod provide NFD master pod definition func NFDMasterPod(image string, onMasterNode bool) *v1.Pod { p := &v1.Pod{ @@ -91,6 +169,12 @@ func NFDWorkerDaemonSet(image string, extraArgs []string) *appsv1.DaemonSet { return newDaemonSet("nfd-worker", podSpec) } +// NFDTopologyUpdaterDaemonSet provides the NFD daemon set topology updater +func NFDTopologyUpdaterDaemonSet(image string, extraArgs []string) *appsv1.DaemonSet { + podSpec := nfdTopologyUpdaterPodSpec(image, extraArgs) + return newDaemonSet("nfd-topology-updater", podSpec) +} + // newDaemonSet provide the new daemon set func newDaemonSet(name string, podSpec *v1.PodSpec) *appsv1.DaemonSet { return &appsv1.DaemonSet{ @@ -182,7 +266,89 @@ func nfdWorkerPodSpec(image string, extraArgs []string) *v1.PodSpec { }, }, } +} +func nfdTopologyUpdaterPodSpec(image string, extraArgs []string) *v1.PodSpec { + return &v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "node-topology-updater", + Image: image, + ImagePullPolicy: pullPolicy(), + Command: []string{"nfd-topology-updater"}, + Args: append([]string{ + "--kubelet-config-file=/podresources/config.yaml", + "--podresources-socket=unix:///podresources/kubelet.sock", + "--sleep-interval=3s", + "--watch-namespace=rte", + "--server=nfd-master-e2e:8080", + }, extraArgs...), + Env: []v1.EnvVar{ + { + Name: "NODE_NAME", + ValueFrom: &v1.EnvVarSource{ + FieldRef: &v1.ObjectFieldSelector{ + FieldPath: "spec.nodeName", + }, + }, + }, + }, + SecurityContext: &v1.SecurityContext{ + Capabilities: &v1.Capabilities{ + Drop: []v1.Capability{"ALL"}, + }, + RunAsUser: pointer.Int64Ptr(0), + ReadOnlyRootFilesystem: pointer.BoolPtr(true), + AllowPrivilegeEscalation: pointer.BoolPtr(false), + }, + VolumeMounts: []v1.VolumeMount{ + { + Name: "kubelet-podresources-conf", + MountPath: "/podresources/config.yaml", + }, + { + Name: "kubelet-podresources-sock", + MountPath: "/podresources/kubelet.sock", + }, + { + Name: "host-sys", + MountPath: "/host/sys", + }, + }, + }, + }, + ServiceAccountName: "nfd-master-e2e", + DNSPolicy: v1.DNSClusterFirstWithHostNet, + Volumes: []v1.Volume{ + { + Name: "kubelet-podresources-conf", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{ + Path: "/var/lib/kubelet/config.yaml", + Type: newHostPathType(v1.HostPathFile), + }, + }, + }, + { + Name: "kubelet-podresources-sock", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{ + Path: "/var/lib/kubelet/pod-resources/kubelet.sock", + Type: newHostPathType(v1.HostPathSocket), + }, + }, + }, + { + Name: "host-sys", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{ + Path: "/sys", + Type: newHostPathType(v1.HostPathDirectory), + }, + }, + }, + }, + } } func newHostPathType(typ v1.HostPathType) *v1.HostPathType { From b342ef5e8b497b50b72f6c3205eda6117e2262e8 Mon Sep 17 00:00:00 2001 From: Talor Itzhak Date: Wed, 6 Oct 2021 17:37:11 +0300 Subject: [PATCH 4/4] Get Topology Manager policy from configz-endpoint This patch allows nfd-master to pull the config from the Kubernetes configz endpoint. This update introduces --obtain-kubelet-config=