Skip to content

implement e2e tests for FPGA plugin #359

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 14, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion deployments/fpga_plugin/fpga_plugin.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ apiVersion: apps/v1
kind: DaemonSet
metadata:
name: intel-fpga-plugin
namespace: kube-system
namespace: {namespace}
labels:
app: intel-fpga-plugin
spec:
Expand Down
4 changes: 2 additions & 2 deletions deployments/fpga_plugin/fpga_plugin_service_account.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ apiVersion: v1
kind: ServiceAccount
metadata:
name: intel-fpga-plugin-controller
namespace: kube-system
namespace: {namespace}
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
Expand All @@ -21,7 +21,7 @@ metadata:
subjects:
- kind: ServiceAccount
name: intel-fpga-plugin-controller
namespace: kube-system
namespace: {namespace}
roleRef:
kind: ClusterRole
name: node-getter
Expand Down
73 changes: 73 additions & 0 deletions scripts/deploy-fpgaplugin.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#!/bin/sh -eu

srcroot="$(realpath $(dirname $0)/..)"
kubectl='kubectl'
namespace='default'
mode='af'
command=''

help() {
echo "Usage: $1 <options> [help|cleanup]"
echo ' Command "help" prints this message'
echo ' Command "cleanup" removes admission webhook deployment'
echo ''
echo ' If no command is given the script will deploy the webhook'
echo ''
echo ' Options:'
echo ' --kubectl <kubectl> - path to the kubectl utility'
echo ' --mode <mode> - "af" (default) or "region" mode of operation'
echo ' --namespace <name> - namespace to deploy the plugin in'
}

while [ $# -gt 0 ]; do
case ${1} in
--kubectl)
kubectl="$2"
shift
;;
--mode)
mode="$2"
shift
;;
--namespace)
namespace="$2"
shift
;;
help)
help $(basename $0)
exit 0
;;
cleanup)
command="cleanup"
;;
*)
echo "Unknown option: ${1}"
exit 1
;;
esac
shift
done

which ${kubectl} > /dev/null 2>&1 || { echo "ERROR: ${kubectl} not found"; exit 1; }

echo "Clean up previously created deployment"
${kubectl} annotate node --all fpga.intel.com/device-plugin-mode- || true
sed -e "s/{namespace}/${namespace}/g" ${srcroot}/deployments/fpga_plugin/fpga_plugin.yaml | ${kubectl} --namespace ${namespace} delete -f - || true
sed -e "s/{namespace}/${namespace}/g" ${srcroot}/deployments/fpga_plugin/fpga_plugin_service_account.yaml | ${kubectl} --namespace ${namespace} delete -f - || true

if [ "x${command}" = "xcleanup" ]; then
echo "Cleanup done. Exiting..."
exit 0
fi

if [ "x${mode}" != "xaf" -a "x${mode}" != "xregion" ]; then
echo "ERROR: supported modes are 'af' and 'region'"
exit 1
fi

echo 'Set default operation mode'
${kubectl} annotate node --overwrite --all fpga.intel.com/device-plugin-mode=${mode} || true
echo 'Create service account'
sed -e "s/{namespace}/${namespace}/g" ${srcroot}/deployments/fpga_plugin/fpga_plugin_service_account.yaml | ${kubectl} --namespace ${namespace} create -f - || true
echo 'Create plugin daemonset'
sed -e "s/{namespace}/${namespace}/g" ${srcroot}/deployments/fpga_plugin/fpga_plugin.yaml | ${kubectl} --namespace ${namespace} create -f - || true
1 change: 1 addition & 0 deletions test/e2e/deviceplugins_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"github.com/onsi/ginkgo"
"github.com/onsi/gomega"

_ "github.com/intel/intel-device-plugins-for-kubernetes/test/e2e/fpga"
_ "github.com/intel/intel-device-plugins-for-kubernetes/test/e2e/fpgaadmissionwebhook"
_ "github.com/intel/intel-device-plugins-for-kubernetes/test/e2e/gpu"
_ "github.com/intel/intel-device-plugins-for-kubernetes/test/e2e/qat"
Expand Down
132 changes: 132 additions & 0 deletions test/e2e/fpga/fpga.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
// Copyright 2020 Intel Corporation. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package fpga

import (
"fmt"
"time"

"github.com/intel/intel-device-plugins-for-kubernetes/test/e2e/utils"
"github.com/onsi/ginkgo"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/kubernetes/test/e2e/framework"
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
)

const (
pluginDeployScript = "scripts/deploy-fpgaplugin.sh"
webhookDeployScript = "scripts/webhook-deploy.sh"
nlb0NodeResource = "fpga.intel.com/af-d8424dc4a4a3c413f89e433683f9040b"
nlb0PodResource = "fpga.intel.com/arria10.dcp1.2-nlb0"
nlb3PodResource = "fpga.intel.com/arria10.dcp1.2-nlb3"
arria10NodeResource = "fpga.intel.com/region-69528db6eb31577a8c3668f9faa081f6"
)

func init() {
ginkgo.Describe("FPGA Plugin E2E tests", describe)
}

func describe() {
webhookDeployScriptPath, err := utils.LocateRepoFile(webhookDeployScript)
if err != nil {
framework.Failf("unable to locate %q: %v", webhookDeployScript, err)
}

pluginDeployScriptPath, err := utils.LocateRepoFile(pluginDeployScript)
if err != nil {
framework.Failf("unable to locate %q: %v", pluginDeployScript, err)
}

fmw := framework.NewDefaultFramework("fpgaplugin-e2e")

ginkgo.It("Run FPGA plugin tests", func() {
// Run region test case twice to ensure that device is reprogrammed at least once
runTestCase(fmw, webhookDeployScriptPath, pluginDeployScriptPath, "region", "orchestrated", arria10NodeResource, nlb3PodResource, "nlb3", "nlb0")
runTestCase(fmw, webhookDeployScriptPath, pluginDeployScriptPath, "region", "orchestrated", arria10NodeResource, nlb0PodResource, "nlb0", "nlb3")
// Run af test case
runTestCase(fmw, webhookDeployScriptPath, pluginDeployScriptPath, "af", "preprogrammed", nlb0NodeResource, nlb0PodResource, "nlb0", "nlb3")
})
}

func runTestCase(fmw *framework.Framework, webhookDeployScriptPath, pluginDeployScriptPath, pluginMode, webhookMode, nodeResource, podResource, cmd1, cmd2 string) {
ginkgo.By(fmt.Sprintf("deploying webhook in %s mode", webhookMode))
_, _, err := framework.RunCmd(webhookDeployScriptPath, "--mode", webhookMode, "--namespace", fmw.Namespace.Name)
framework.ExpectNoError(err)

waitForPod(fmw, "intel-fpga-webhook")

ginkgo.By(fmt.Sprintf("deploying FPGA plugin in %s mode", pluginMode))
_, _, err = framework.RunCmd(pluginDeployScriptPath, "--mode", pluginMode, "--namespace", fmw.Namespace.Name)
framework.ExpectNoError(err)
Comment on lines +66 to +74
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've not played with FPGA deployments enough to have strong opinion here but I wonder if we should:

  1. deploy a "global webhook" that gets shared between the tests (AFAIK that's the goal with fpga: make admission webhook mode-less #358)
  2. have a custom namespace for the webhook serviceaccount
  3. deploy plugin using the default serviceaccount
  4. the key test case is "Note that the mappings are scoped to the namespaces they were created in and they are applicable to pods created in the corresponding namespaces." isn't that possible in each custom test run where the namespace comes from gingko?

Copy link
Member Author

@bart0sh bart0sh Apr 14, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently webhook and plugin mode must match. The test uses both modes, that's why I implemented it this way. Another point of doing this is that it's a cleaner approach to use ginkgo test namespace for as many components as possible because ginkgo automatically cleans its namespace after the test.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bart0sh I don't like the new script and that's why I was pondering options to drop it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be a trivial change to get rid of this script when plugin kustomization works correctly, i.e. when #318 is done. So, I'd propose to merge this PR now. Otherwise we'll have to wait until #318 is done.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, that's probably fine. Do you have input to #318 to ensure "kustomization works correctly"?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mythi Yes, I've added 2 items to the #318 last week.


waitForPod(fmw, "intel-fpga-plugin")

resource := v1.ResourceName(nodeResource)
ginkgo.By("checking if the resource is allocatable")
if err := utils.WaitForNodesWithResource(fmw.ClientSet, resource, 30*time.Second); err != nil {
framework.Failf("unable to wait for nodes to have positive allocatable resource: %v", err)
}

resource = v1.ResourceName(podResource)
image := "intel/opae-nlb-demo:devel"

ginkgo.By("submitting a pod requesting correct FPGA resources")
pod := createPod(fmw, fmt.Sprintf("fpgaplugin-nlb-%s-%s-%s-correct", pluginMode, cmd1, cmd2), resource, image, []string{cmd1})

ginkgo.By("waiting the pod to finish successfully")
fmw.PodClient().WaitForSuccess(pod.ObjectMeta.Name, 60*time.Second)
// If WaitForSuccess fails, ginkgo doesn't show the logs of the failed container.
// Replacing WaitForSuccess with WaitForFinish + 'kubelet logs' would show the logs
// fmw.PodClient().WaitForFinish(pod.ObjectMeta.Name, 60*time.Second)
// framework.RunKubectlOrDie("--namespace", fmw.Namespace.Name, "logs", pod.ObjectMeta.Name)
// return

ginkgo.By("submitting a pod requesting incorrect FPGA resources")
pod = createPod(fmw, fmt.Sprintf("fpgaplugin-nlb-%s-%s-%s-incorrect", pluginMode, cmd1, cmd2), resource, image, []string{cmd2})

ginkgo.By("waiting the pod failure")
fmw.PodClient().WaitForFailure(pod.ObjectMeta.Name, 60*time.Second)
}

func createPod(fmw *framework.Framework, name string, resourceName v1.ResourceName, image string, command []string) *v1.Pod {
resourceList := v1.ResourceList{resourceName: resource.MustParse("1"),
"cpu": resource.MustParse("1"),
"hugepages-2Mi": resource.MustParse("20Mi")}
podSpec := fmw.NewTestPod(name, resourceList, resourceList)
podSpec.Spec.RestartPolicy = v1.RestartPolicyNever
podSpec.Spec.Containers[0].Image = image
podSpec.Spec.Containers[0].Command = command
podSpec.Spec.Containers[0].SecurityContext = &v1.SecurityContext{
Capabilities: &v1.Capabilities{
Add: []v1.Capability{"IPC_LOCK"},
},
}

pod, err := fmw.ClientSet.CoreV1().Pods(fmw.Namespace.Name).Create(podSpec)
framework.ExpectNoError(err, "pod Create API error")
return pod
}

func waitForPod(fmw *framework.Framework, name string) {
ginkgo.By(fmt.Sprintf("waiting for %s availability", name))
if _, err := e2epod.WaitForPodsWithLabelRunningReady(fmw.ClientSet, fmw.Namespace.Name,
labels.Set{"app": name}.AsSelector(), 1, 10*time.Second); err != nil {
framework.DumpAllNamespaceInfo(fmw.ClientSet, fmw.Namespace.Name)
framework.LogFailedContainers(fmw.ClientSet, fmw.Namespace.Name, framework.Logf)
framework.Failf("unable to wait for all pods to be running and ready: %v", err)
}
}