From 182601bdf741afc7df2e754bb67ac7e45cc38ed9 Mon Sep 17 00:00:00 2001 From: Ed Bartosh Date: Thu, 9 Apr 2020 10:51:23 +0300 Subject: [PATCH 1/2] run fpga plugin in arbitrary namespace --- deployments/fpga_plugin/fpga_plugin.yaml | 2 +- .../fpga_plugin_service_account.yaml | 4 +- scripts/deploy-fpgaplugin.sh | 73 +++++++++++++++++++ 3 files changed, 76 insertions(+), 3 deletions(-) create mode 100755 scripts/deploy-fpgaplugin.sh diff --git a/deployments/fpga_plugin/fpga_plugin.yaml b/deployments/fpga_plugin/fpga_plugin.yaml index 9de04246c..a3d1b0f7f 100644 --- a/deployments/fpga_plugin/fpga_plugin.yaml +++ b/deployments/fpga_plugin/fpga_plugin.yaml @@ -2,7 +2,7 @@ apiVersion: apps/v1 kind: DaemonSet metadata: name: intel-fpga-plugin - namespace: kube-system + namespace: {namespace} labels: app: intel-fpga-plugin spec: diff --git a/deployments/fpga_plugin/fpga_plugin_service_account.yaml b/deployments/fpga_plugin/fpga_plugin_service_account.yaml index 261d8beca..23cf80635 100644 --- a/deployments/fpga_plugin/fpga_plugin_service_account.yaml +++ b/deployments/fpga_plugin/fpga_plugin_service_account.yaml @@ -2,7 +2,7 @@ apiVersion: v1 kind: ServiceAccount metadata: name: intel-fpga-plugin-controller - namespace: kube-system + namespace: {namespace} --- kind: ClusterRole apiVersion: rbac.authorization.k8s.io/v1 @@ -21,7 +21,7 @@ metadata: subjects: - kind: ServiceAccount name: intel-fpga-plugin-controller - namespace: kube-system + namespace: {namespace} roleRef: kind: ClusterRole name: node-getter diff --git a/scripts/deploy-fpgaplugin.sh b/scripts/deploy-fpgaplugin.sh new file mode 100755 index 000000000..6bd0820e3 --- /dev/null +++ b/scripts/deploy-fpgaplugin.sh @@ -0,0 +1,73 @@ +#!/bin/sh -eu + +srcroot="$(realpath $(dirname $0)/..)" +kubectl='kubectl' +namespace='default' +mode='af' +command='' + +help() { + echo "Usage: $1 [help|cleanup]" + echo ' Command "help" prints this message' + echo ' Command "cleanup" removes admission webhook deployment' + echo '' + echo ' If no command is given the script will deploy the webhook' + echo '' + echo ' Options:' + echo ' --kubectl - path to the kubectl utility' + echo ' --mode - "af" (default) or "region" mode of operation' + echo ' --namespace - namespace to deploy the plugin in' +} + +while [ $# -gt 0 ]; do + case ${1} in + --kubectl) + kubectl="$2" + shift + ;; + --mode) + mode="$2" + shift + ;; + --namespace) + namespace="$2" + shift + ;; + help) + help $(basename $0) + exit 0 + ;; + cleanup) + command="cleanup" + ;; + *) + echo "Unknown option: ${1}" + exit 1 + ;; + esac + shift +done + +which ${kubectl} > /dev/null 2>&1 || { echo "ERROR: ${kubectl} not found"; exit 1; } + +echo "Clean up previously created deployment" +${kubectl} annotate node --all fpga.intel.com/device-plugin-mode- || true +sed -e "s/{namespace}/${namespace}/g" ${srcroot}/deployments/fpga_plugin/fpga_plugin.yaml | ${kubectl} --namespace ${namespace} delete -f - || true +sed -e "s/{namespace}/${namespace}/g" ${srcroot}/deployments/fpga_plugin/fpga_plugin_service_account.yaml | ${kubectl} --namespace ${namespace} delete -f - || true + +if [ "x${command}" = "xcleanup" ]; then + echo "Cleanup done. Exiting..." + exit 0 +fi + +if [ "x${mode}" != "xaf" -a "x${mode}" != "xregion" ]; then + echo "ERROR: supported modes are 'af' and 'region'" + exit 1 +fi + +echo 'Set default operation mode' +${kubectl} annotate node --overwrite --all fpga.intel.com/device-plugin-mode=${mode} || true +echo 'Create service account' +sed -e "s/{namespace}/${namespace}/g" ${srcroot}/deployments/fpga_plugin/fpga_plugin_service_account.yaml | ${kubectl} --namespace ${namespace} create -f - || true +echo 'Create plugin daemonset' +sed -e "s/{namespace}/${namespace}/g" ${srcroot}/deployments/fpga_plugin/fpga_plugin.yaml | ${kubectl} --namespace ${namespace} create -f - || true From 3cb9bd228f85dcbe04bc2d28d4873a993d6f4a9d Mon Sep 17 00:00:00 2001 From: Ed Bartosh Date: Tue, 7 Apr 2020 17:34:45 +0300 Subject: [PATCH 2/2] implement e2e tests for FPGA plugin implemented e2e tests for preprogrammed(af) and orchestrated(region) plugin modes. --- test/e2e/deviceplugins_suite_test.go | 1 + test/e2e/fpga/fpga.go | 132 +++++++++++++++++++++++++++ 2 files changed, 133 insertions(+) create mode 100644 test/e2e/fpga/fpga.go diff --git a/test/e2e/deviceplugins_suite_test.go b/test/e2e/deviceplugins_suite_test.go index 2101e54f1..e4f206981 100644 --- a/test/e2e/deviceplugins_suite_test.go +++ b/test/e2e/deviceplugins_suite_test.go @@ -22,6 +22,7 @@ import ( "github.com/onsi/ginkgo" "github.com/onsi/gomega" + _ "github.com/intel/intel-device-plugins-for-kubernetes/test/e2e/fpga" _ "github.com/intel/intel-device-plugins-for-kubernetes/test/e2e/fpgaadmissionwebhook" _ "github.com/intel/intel-device-plugins-for-kubernetes/test/e2e/gpu" _ "github.com/intel/intel-device-plugins-for-kubernetes/test/e2e/qat" diff --git a/test/e2e/fpga/fpga.go b/test/e2e/fpga/fpga.go new file mode 100644 index 000000000..ab41561b5 --- /dev/null +++ b/test/e2e/fpga/fpga.go @@ -0,0 +1,132 @@ +// Copyright 2020 Intel Corporation. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fpga + +import ( + "fmt" + "time" + + "github.com/intel/intel-device-plugins-for-kubernetes/test/e2e/utils" + "github.com/onsi/ginkgo" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/kubernetes/test/e2e/framework" + e2epod "k8s.io/kubernetes/test/e2e/framework/pod" +) + +const ( + pluginDeployScript = "scripts/deploy-fpgaplugin.sh" + webhookDeployScript = "scripts/webhook-deploy.sh" + nlb0NodeResource = "fpga.intel.com/af-d8424dc4a4a3c413f89e433683f9040b" + nlb0PodResource = "fpga.intel.com/arria10.dcp1.2-nlb0" + nlb3PodResource = "fpga.intel.com/arria10.dcp1.2-nlb3" + arria10NodeResource = "fpga.intel.com/region-69528db6eb31577a8c3668f9faa081f6" +) + +func init() { + ginkgo.Describe("FPGA Plugin E2E tests", describe) +} + +func describe() { + webhookDeployScriptPath, err := utils.LocateRepoFile(webhookDeployScript) + if err != nil { + framework.Failf("unable to locate %q: %v", webhookDeployScript, err) + } + + pluginDeployScriptPath, err := utils.LocateRepoFile(pluginDeployScript) + if err != nil { + framework.Failf("unable to locate %q: %v", pluginDeployScript, err) + } + + fmw := framework.NewDefaultFramework("fpgaplugin-e2e") + + ginkgo.It("Run FPGA plugin tests", func() { + // Run region test case twice to ensure that device is reprogrammed at least once + runTestCase(fmw, webhookDeployScriptPath, pluginDeployScriptPath, "region", "orchestrated", arria10NodeResource, nlb3PodResource, "nlb3", "nlb0") + runTestCase(fmw, webhookDeployScriptPath, pluginDeployScriptPath, "region", "orchestrated", arria10NodeResource, nlb0PodResource, "nlb0", "nlb3") + // Run af test case + runTestCase(fmw, webhookDeployScriptPath, pluginDeployScriptPath, "af", "preprogrammed", nlb0NodeResource, nlb0PodResource, "nlb0", "nlb3") + }) +} + +func runTestCase(fmw *framework.Framework, webhookDeployScriptPath, pluginDeployScriptPath, pluginMode, webhookMode, nodeResource, podResource, cmd1, cmd2 string) { + ginkgo.By(fmt.Sprintf("deploying webhook in %s mode", webhookMode)) + _, _, err := framework.RunCmd(webhookDeployScriptPath, "--mode", webhookMode, "--namespace", fmw.Namespace.Name) + framework.ExpectNoError(err) + + waitForPod(fmw, "intel-fpga-webhook") + + ginkgo.By(fmt.Sprintf("deploying FPGA plugin in %s mode", pluginMode)) + _, _, err = framework.RunCmd(pluginDeployScriptPath, "--mode", pluginMode, "--namespace", fmw.Namespace.Name) + framework.ExpectNoError(err) + + waitForPod(fmw, "intel-fpga-plugin") + + resource := v1.ResourceName(nodeResource) + ginkgo.By("checking if the resource is allocatable") + if err := utils.WaitForNodesWithResource(fmw.ClientSet, resource, 30*time.Second); err != nil { + framework.Failf("unable to wait for nodes to have positive allocatable resource: %v", err) + } + + resource = v1.ResourceName(podResource) + image := "intel/opae-nlb-demo:devel" + + ginkgo.By("submitting a pod requesting correct FPGA resources") + pod := createPod(fmw, fmt.Sprintf("fpgaplugin-nlb-%s-%s-%s-correct", pluginMode, cmd1, cmd2), resource, image, []string{cmd1}) + + ginkgo.By("waiting the pod to finish successfully") + fmw.PodClient().WaitForSuccess(pod.ObjectMeta.Name, 60*time.Second) + // If WaitForSuccess fails, ginkgo doesn't show the logs of the failed container. + // Replacing WaitForSuccess with WaitForFinish + 'kubelet logs' would show the logs + // fmw.PodClient().WaitForFinish(pod.ObjectMeta.Name, 60*time.Second) + // framework.RunKubectlOrDie("--namespace", fmw.Namespace.Name, "logs", pod.ObjectMeta.Name) + // return + + ginkgo.By("submitting a pod requesting incorrect FPGA resources") + pod = createPod(fmw, fmt.Sprintf("fpgaplugin-nlb-%s-%s-%s-incorrect", pluginMode, cmd1, cmd2), resource, image, []string{cmd2}) + + ginkgo.By("waiting the pod failure") + fmw.PodClient().WaitForFailure(pod.ObjectMeta.Name, 60*time.Second) +} + +func createPod(fmw *framework.Framework, name string, resourceName v1.ResourceName, image string, command []string) *v1.Pod { + resourceList := v1.ResourceList{resourceName: resource.MustParse("1"), + "cpu": resource.MustParse("1"), + "hugepages-2Mi": resource.MustParse("20Mi")} + podSpec := fmw.NewTestPod(name, resourceList, resourceList) + podSpec.Spec.RestartPolicy = v1.RestartPolicyNever + podSpec.Spec.Containers[0].Image = image + podSpec.Spec.Containers[0].Command = command + podSpec.Spec.Containers[0].SecurityContext = &v1.SecurityContext{ + Capabilities: &v1.Capabilities{ + Add: []v1.Capability{"IPC_LOCK"}, + }, + } + + pod, err := fmw.ClientSet.CoreV1().Pods(fmw.Namespace.Name).Create(podSpec) + framework.ExpectNoError(err, "pod Create API error") + return pod +} + +func waitForPod(fmw *framework.Framework, name string) { + ginkgo.By(fmt.Sprintf("waiting for %s availability", name)) + if _, err := e2epod.WaitForPodsWithLabelRunningReady(fmw.ClientSet, fmw.Namespace.Name, + labels.Set{"app": name}.AsSelector(), 1, 10*time.Second); err != nil { + framework.DumpAllNamespaceInfo(fmw.ClientSet, fmw.Namespace.Name) + framework.LogFailedContainers(fmw.ClientSet, fmw.Namespace.Name, framework.Logf) + framework.Failf("unable to wait for all pods to be running and ready: %v", err) + } +}