Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

e2e: add a test case for rbd-nbd mounter #1839

Merged
merged 6 commits into from
May 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 43 additions & 8 deletions e2e/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,17 +105,40 @@ func waitForDeploymentComplete(name, ns string, c kubernetes.Interface, t int) e
return nil
}

func getCommandInPodOpts(f *framework.Framework, c, ns string, opt *metav1.ListOptions) (framework.ExecOptions, error) {
func findPodAndContainerName(f *framework.Framework, ns, cn string, opt *metav1.ListOptions) (string, string, error) {
podList, err := f.PodClientNS(ns).List(context.TODO(), *opt)
if err != nil {
return "", "", err
}

if len(podList.Items) == 0 {
return "", "", errors.New("podlist is empty")
}

if cn != "" {
for i := range podList.Items {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

found is only using if the cn is not empty can we move found:=false inside if check?

for j := range podList.Items[i].Spec.Containers {
if podList.Items[i].Spec.Containers[j].Name == cn {
return podList.Items[i].Name, cn, nil
}
}
}
return "", "", errors.New("container name not found")
}
return podList.Items[0].Name, podList.Items[0].Spec.Containers[0].Name, nil
}

func getCommandInPodOpts(f *framework.Framework, c, ns, cn string, opt *metav1.ListOptions) (framework.ExecOptions, error) {
cmd := []string{"/bin/sh", "-c", c}
pods, err := listPods(f, ns, opt)
pName, cName, err := findPodAndContainerName(f, ns, cn, opt)
if err != nil {
return framework.ExecOptions{}, err
}
return framework.ExecOptions{
Command: cmd,
PodName: pods[0].Name,
PodName: pName,
Namespace: ns,
ContainerName: pods[0].Spec.Containers[0].Name,
ContainerName: cName,
Stdin: nil,
CaptureStdout: true,
CaptureStderr: true,
Expand Down Expand Up @@ -170,7 +193,19 @@ func listPods(f *framework.Framework, ns string, opt *metav1.ListOptions) ([]v1.
}

func execCommandInPod(f *framework.Framework, c, ns string, opt *metav1.ListOptions) (string, string, error) {
podOpt, err := getCommandInPodOpts(f, c, ns, opt)
podOpt, err := getCommandInPodOpts(f, c, ns, "", opt)
if err != nil {
return "", "", err
}
stdOut, stdErr, err := f.ExecWithOptions(podOpt)
if stdErr != "" {
e2elog.Logf("stdErr occurred: %v", stdErr)
}
return stdOut, stdErr, err
}

func execCommandInContainer(f *framework.Framework, c, ns, cn string, opt *metav1.ListOptions) (string, string, error) { //nolint:unparam,lll // cn can be used with different inputs later
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We usually put these comments above the line that is affected, not after the line (this line becomes really long now).

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought putting it above the line will affect the whole function block not just that line in this case?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably, but that is not really an issue.

In general, all nolint issues need to be addressed at one point. That means, either the argument is not needed and can be dropped, or the function gets used with other arguments somewhere.

podOpt, err := getCommandInPodOpts(f, c, ns, cn, opt)
if err != nil {
return "", "", err
}
Expand All @@ -185,7 +220,7 @@ func execCommandInToolBoxPod(f *framework.Framework, c, ns string) (string, stri
opt := &metav1.ListOptions{
LabelSelector: rookToolBoxPodLabel,
}
podOpt, err := getCommandInPodOpts(f, c, ns, opt)
podOpt, err := getCommandInPodOpts(f, c, ns, "", opt)
if err != nil {
return "", "", err
}
Expand All @@ -197,7 +232,7 @@ func execCommandInToolBoxPod(f *framework.Framework, c, ns string) (string, stri
}

func execCommandInPodAndAllowFail(f *framework.Framework, c, ns string, opt *metav1.ListOptions) (string, string) {
podOpt, err := getCommandInPodOpts(f, c, ns, opt)
podOpt, err := getCommandInPodOpts(f, c, ns, "", opt)
if err != nil {
return "", err.Error()
}
Expand Down Expand Up @@ -269,7 +304,7 @@ func deletePod(name, ns string, c kubernetes.Interface, t int) error {
})
}

func deletePodWithLabel(label, ns string, skipNotFound bool) error {
func deletePodWithLabel(label, ns string, skipNotFound bool) error { //nolint:unparam // skipNotFound can be used with different inputs later
_, err := framework.RunKubectl(ns, "delete", "po", "-l", label, fmt.Sprintf("--ignore-not-found=%t", skipNotFound))
if err != nil {
e2elog.Logf("failed to delete pod %v", err)
Expand Down
278 changes: 267 additions & 11 deletions e2e/rbd.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"io/ioutil"
"os"
"regexp"
"strings"

. "github.com/onsi/ginkgo" // nolint
Expand Down Expand Up @@ -349,6 +350,250 @@ var _ = Describe("RBD", func() {
}
})

By("create a PVC and bind it to an app using rbd-nbd mounter", func() {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool

err := deleteResource(rbdExamplePath + "storageclass.yaml")
if err != nil {
e2elog.Failf("failed to delete storageclass with error %v", err)
}
err = createRBDStorageClass(f.ClientSet, f, nil, map[string]string{"mounter": "rbd-nbd"}, deletePolicy)
if err != nil {
e2elog.Failf("failed to create storageclass with error %v", err)
}
err = validatePVCAndAppBinding(pvcPath, appPath, f)
if err != nil {
e2elog.Failf("failed to validate pvc and application binding with error %v", err)
}
// validate created backend rbd images
validateRBDImageCount(f, 0)
err = deleteResource(rbdExamplePath + "storageclass.yaml")
if err != nil {
e2elog.Failf("failed to delete storageclass with error %v", err)
}
err = createRBDStorageClass(f.ClientSet, f, nil, nil, deletePolicy)
if err != nil {
e2elog.Failf("failed to create storageclass with error %v", err)
}
})

By("perform IO on rbd-nbd volume after nodeplugin restart and expect a failure", func() {
err := deleteResource(rbdExamplePath + "storageclass.yaml")
if err != nil {
e2elog.Failf("failed to delete storageclass with error %v", err)
}
// Storage class with rbd-nbd mounter
err = createRBDStorageClass(f.ClientSet, f, nil, map[string]string{"mounter": "rbd-nbd"}, deletePolicy)
if err != nil {
e2elog.Failf("failed to create storageclass with error %v", err)
}
pvc, err := loadPVC(pvcPath)
if err != nil {
e2elog.Failf("failed to load PVC with error %v", err)
}
pvc.Namespace = f.UniqueName

app, err := loadApp(appPath)
if err != nil {
e2elog.Failf("failed to load application with error %v", err)
}

app.Namespace = f.UniqueName
label := map[string]string{
"app": app.Name,
}
app.Labels = label
app.Spec.Volumes[0].PersistentVolumeClaim.ClaimName = pvc.Name
err = createPVCAndApp("", f, pvc, app, deployTimeout)
if err != nil {
e2elog.Failf("failed to create PVC and application with error %v", err)
}

// validate created backend rbd images
validateRBDImageCount(f, 1)

selector, err := getDaemonSetLabelSelector(f, cephCSINamespace, rbdDaemonsetName)
if err != nil {
e2elog.Failf("failed to get the labels with error %v", err)
}
// delete rbd nodeplugin pods
err = deletePodWithLabel(selector, cephCSINamespace, false)
if err != nil {
e2elog.Failf("fail to delete pod with error %v", err)
}

// wait for nodeplugin pods to come up
err = waitForDaemonSets(rbdDaemonsetName, cephCSINamespace, f.ClientSet, deployTimeout)
if err != nil {
e2elog.Failf("timeout waiting for daemonset pods with error %v", err)
}

opt := metav1.ListOptions{
LabelSelector: fmt.Sprintf("app=%s", app.Name),
}

// FIXME: Fix this behavior, i.e. when the nodeplugin is
// restarted, the rbd-nbd processes should be back to life
// as rbd-nbd processes are responsible for IO

// For now to prove this isn't working, write something to
// mountpoint and expect a failure as the processes are terminated.
filePath := app.Spec.Containers[0].VolumeMounts[0].MountPath + "/test"
_, stdErr := execCommandInPodAndAllowFail(f, fmt.Sprintf("echo 'Hello World' > %s", filePath), app.Namespace, &opt)
IOErr := fmt.Sprintf("cannot create %s: Input/output error", filePath)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a FIXME to fix this one as this wont be true when we implement logic to handle daemonset pod restart?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure

if !strings.Contains(stdErr, IOErr) {
e2elog.Failf(stdErr)
} else {
e2elog.Logf("failed IO as expected: %v", stdErr)
}

err = deletePVCAndApp("", f, pvc, app)
if err != nil {
e2elog.Failf("failed to delete PVC and application with error %v", err)
}
// validate created backend rbd images
validateRBDImageCount(f, 0)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

delete the storageclass here as this is specific to nbr adding a test for krbd in between might get failed because of this one? and recreate it in next By if you want?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

err = deleteResource(rbdExamplePath + "storageclass.yaml")
if err != nil {
e2elog.Failf("failed to delete storageclass with error %v", err)
}
err = createRBDStorageClass(f.ClientSet, f, nil, nil, deletePolicy)
if err != nil {
e2elog.Failf("failed to create storageclass with error %v", err)
}
})

By("restart rbd-nbd process on nodeplugin and continue IO after nodeplugin restart", func() {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a question Is this whole test case required? as we are doing some hacks to remount the rbd image which is not the case in the real world and this is expected to handled automatically by cephcsi or other sidecars in daemonset pod?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The subject of the tests remains the same after the automation as well, just that we might remove the lines which are trying to manually bring the process online and override them with the steps to bring the process back to online in the background.

In anyway having this in the CI till then will help us understand the regressions and true failures within rbd-nbd.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

currently this PR adds two tests perform IO on rbd-nbd volume after nodeplugin restart and expect a failure and restart rbd-nbd process on nodeplugin and continue IO after nodeplugin restart but when we bring the automatic handling of the restart, one test will be removed and other will be modified to check we are able to read/write the data? IMO having single test seems fine for me. Lets hear from others

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are the POC level tests, for someone who is new to rbd-nbd mounter can understand the problem we are dealing here very clearly with the help of these test cases.

In the future when the real story is working we can clear these bits and makeup more useful test cases.

Till then the intention of these tests is to catch some regressions or failures at rbd-nbd only.

err := deleteResource(rbdExamplePath + "storageclass.yaml")
if err != nil {
e2elog.Failf("failed to delete storageclass with error %v", err)
}
// Tweak Storageclass to add netlink,reattach rbd-nbd mounter options
scOpts := map[string]string{
"mounter": "rbd-nbd",
"mapOptions": "try-netlink,reattach-timeout=180",
}
err = createRBDStorageClass(f.ClientSet, f, nil, scOpts, deletePolicy)
if err != nil {
e2elog.Failf("failed to create storageclass with error %v", err)
}

pvc, err := loadPVC(pvcPath)
if err != nil {
e2elog.Failf("failed to load PVC with error %v", err)
}
pvc.Namespace = f.UniqueName

app, err := loadApp(appPath)
if err != nil {
e2elog.Failf("failed to load application with error %v", err)
}
app.Namespace = f.UniqueName
label := map[string]string{
"app": app.Name,
}
app.Labels = label
app.Spec.Volumes[0].PersistentVolumeClaim.ClaimName = pvc.Name
err = createPVCAndApp("", f, pvc, app, deployTimeout)
if err != nil {
e2elog.Failf("failed to create PVC and application with error %v", err)
}
// validate created backend rbd images
validateRBDImageCount(f, 1)

selector, err := getDaemonSetLabelSelector(f, cephCSINamespace, rbdDaemonsetName)
if err != nil {
e2elog.Failf("failed to get the labels with error %v", err)
}

opt := metav1.ListOptions{
LabelSelector: selector,
}

uname, stdErr, err := execCommandInContainer(f, "uname -a", cephCSINamespace, "csi-rbdplugin", &opt)
if err != nil || stdErr != "" {
e2elog.Failf("failed to run uname cmd : %v, stdErr: %v ", err, stdErr)
}
e2elog.Logf("uname -a: %v", uname)
rpmv, stdErr, err := execCommandInContainer(f, "rpm -qa | grep rbd-nbd", cephCSINamespace, "csi-rbdplugin", &opt)
if err != nil || stdErr != "" {
e2elog.Failf("failed to run rpm -qa cmd : %v, stdErr: %v ", err, stdErr)
}
e2elog.Logf("rbd-nbd package version: %v", rpmv)

// Get details of rbd-nbd process
// # ps -eo 'cmd' | grep [r]bd-nbd
// /usr/bin/rbd-nbd --id cephcsi-rbd-node -m svc-name:6789 --keyfile=/tmp/csi/keys/keyfile attach --device /dev/nbd0 pool-name/image-name --try-netlink --reattach-timeout=180
mapCmd, stdErr, err := execCommandInContainer(f, "ps -eo 'cmd' | grep [r]bd-nbd", cephCSINamespace, "csi-rbdplugin", &opt)
if err != nil || stdErr != "" {
e2elog.Failf("failed to run ps cmd : %v, stdErr: %v ", err, stdErr)
}
e2elog.Logf("map command running before restart, mapCmd: %v", mapCmd)

rbdNodeKey, stdErr, err := execCommandInToolBoxPod(f, "ceph auth get-key client.cephcsi-rbd-node", rookNamespace)
if err != nil || stdErr != "" {
e2elog.Failf("error getting cephcsi-rbd-node key, err: %v, stdErr: %v ", err, stdErr)
}

// restart the rbd node plugin
err = deletePodWithLabel(selector, cephCSINamespace, false)
if err != nil {
e2elog.Failf("fail to delete pod with error %v", err)
}

// wait for nodeplugin pods to come up
err = waitForDaemonSets(rbdDaemonsetName, cephCSINamespace, f.ClientSet, deployTimeout)
if err != nil {
e2elog.Failf("timeout waiting for daemonset pods with error %v", err)
}

// Prepare the rbd-nbd with command args
attachCmd := strings.ReplaceAll(mapCmd, "map", "attach --device /dev/nbd0")
m1 := regexp.MustCompile(`/keyfile-[0-9]* `)
attachCmd = m1.ReplaceAllString(attachCmd, "/keyfile-test ")
e2elog.Logf("attach command to run after restart, attachCmd: %v", attachCmd)

// create the keyfile
_, stdErr, err = execCommandInContainer(f, fmt.Sprintf("echo %s > /tmp/csi/keys/keyfile-test", rbdNodeKey), cephCSINamespace, "csi-rbdplugin", &opt)
if err != nil || stdErr != "" {
e2elog.Failf("failed to write key to a file, err: %v, stdErr: %v ", err, stdErr)
}

_, stdErr, err = execCommandInContainer(f, attachCmd, cephCSINamespace, "csi-rbdplugin", &opt)
if err != nil || stdErr != "" {
e2elog.Failf("failed to run attach cmd err: %v, stdErr: %v ", err, stdErr)
}

runningAttachCmd, stdErr, err := execCommandInContainer(f, "ps -eo 'cmd' | grep [r]bd-nbd", cephCSINamespace, "csi-rbdplugin", &opt)
if err != nil || stdErr != "" {
e2elog.Failf("failed to run ps cmd : %v, stdErr: %v ", err, stdErr)
}
e2elog.Logf("attach command running after restart, runningAttachCmd: %v", runningAttachCmd)

appOpt := metav1.ListOptions{
LabelSelector: fmt.Sprintf("app=%s", app.Name),
}
// Write something to mountpoint and expect it to happen
filePath := app.Spec.Containers[0].VolumeMounts[0].MountPath + "/test"
_, stdErr, err = execCommandInPod(f, fmt.Sprintf("echo 'Hello World' > %s", filePath), app.Namespace, &appOpt)
if err != nil || stdErr != "" {
e2elog.Failf("failed to write IO, err: %v, stdErr: %v ", err, stdErr)
}

err = deletePVCAndApp("", f, pvc, app)
if err != nil {
e2elog.Failf("failed to delete PVC and application with error %v", err)
}
// validate created backend rbd images
validateRBDImageCount(f, 0)
err = deleteResource(rbdExamplePath + "storageclass.yaml")
if err != nil {
e2elog.Failf("failed to delete storageclass with error %v", err)
}
err = createRBDStorageClass(f.ClientSet, f, nil, nil, deletePolicy)
if err != nil {
e2elog.Failf("failed to create storageclass with error %v", err)
}
})

By("create a PVC and bind it to an app with encrypted RBD volume", func() {
err := deleteResource(rbdExamplePath + "storageclass.yaml")
if err != nil {
Expand Down Expand Up @@ -482,17 +727,28 @@ var _ = Describe("RBD", func() {
}
})

// TODO: enable this test when we support rbd-nbd mounter in E2E.
// nbd module should be present on the host machine to run use the
// rbd-nbd mounter.

// By("create a PVC and Bind it to an app with journaling/exclusive-lock image-features and rbd-nbd mounter", func() {
// deleteResource(rbdExamplePath + "storageclass.yaml")
// createRBDStorageClass(f.ClientSet, f, nil, map[string]string{"imageFeatures": "layering,journaling,exclusive-lock", "mounter": "rbd-nbd"})
// validatePVCAndAppBinding(pvcPath, appPath, f)
// deleteResource(rbdExamplePath + "storageclass.yaml")
// createRBDStorageClass(f.ClientSet, f, nil, make(map[string]string))
// })
By("create a PVC and Bind it to an app with journaling/exclusive-lock image-features and rbd-nbd mounter", func() {
err := deleteResource(rbdExamplePath + "storageclass.yaml")
if err != nil {
e2elog.Failf("failed to delete storageclass with error %v", err)
}
err = createRBDStorageClass(f.ClientSet, f, nil, map[string]string{"imageFeatures": "layering,journaling,exclusive-lock", "mounter": "rbd-nbd"}, deletePolicy)
if err != nil {
e2elog.Failf("failed to create storageclass with error %v", err)
}
err = validatePVCAndAppBinding(pvcPath, appPath, f)
if err != nil {
e2elog.Failf("failed to validate pvc and application binding with error %v", err)
}
err = deleteResource(rbdExamplePath + "storageclass.yaml")
if err != nil {
e2elog.Failf("failed to delete storageclass with error %v", err)
}
err = createRBDStorageClass(f.ClientSet, f, nil, nil, deletePolicy)
if err != nil {
e2elog.Failf("failed to create storageclass with error %v", err)
}
})

By("create a PVC clone and bind it to an app", func() {
// snapshot beta is only supported from v1.17+
Expand Down