Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

e2e: add reclaim cases and fix placeholder nits #906

Merged
merged 3 commits into from
Jul 7, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
338 changes: 301 additions & 37 deletions test/e2e/reclaim.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,252 @@ import (

. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
batchv1alpha1 "volcano.sh/volcano/pkg/apis/batch/v1alpha1"
schedulingv1beta1 "volcano.sh/volcano/pkg/apis/scheduling/v1beta1"
)

var _ = Describe("Queue E2E Test", func() {
It("Reclaim: New queue with job created no reclaim when resource is enough", func() {
q1 := "default"
var _ = Describe("Reclaim E2E Test", func() {

CreateReclaimJob := func(ctx *testContext, req v1.ResourceList, name string, queue string, pri string) (*batchv1alpha1.Job, error) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's move this function out

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function is only use in reclaim, seems not suitable to move to utils.

job := &jobSpec{
tasks: []taskSpec{
{
img: defaultNginxImage,
req: req,
min: 1,
rep: 1,
},
},
name: name,
queue: queue,
}
if pri != "" {
job.pri = pri
}
batchJob, err := createJobInner(ctx, job)
if err != nil {
return nil, err
}
err = waitTasksReady(ctx, batchJob, 1)
return batchJob, err
}

WaitQueueStatus := func(ctx *testContext, status string, num int32, queue string) error {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what the num param stands for

err := waitQueueStatus(func() (bool, error) {
queue, err := ctx.vcclient.SchedulingV1beta1().Queues().Get(context.TODO(), queue, metav1.GetOptions{})
Expect(err).NotTo(HaveOccurred(), "Get queue %s failed", queue)
switch status {
case "Running":
return queue.Status.Running == num, nil
case "Open":
return queue.Status.State == schedulingv1beta1.QueueStateOpen, nil
case "Pending":
return queue.Status.Pending == num, nil
default:
return false, nil
}
})
return err
}

It("Reclaim Case 1: New queue with job created no reclaim when resource is enough", func() {
q1 := defaultQueue
q2 := "reclaim-q2"
ctx := initTestContext(options{
queues: []string{q2},
nodesNumLimit: 4,
nodesResourceLimit: CPU1Mem1,
})

defer cleanupTestContext(ctx)

By("Setup initial jobs")

_, err := CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j1", q1, "")
hzxuzhonghu marked this conversation as resolved.
Show resolved Hide resolved
Expect(err).NotTo(HaveOccurred(), "Wait for job1 failed")

_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j2", q2, "")
Expect(err).NotTo(HaveOccurred(), "Wait for job2 failed")

By("Create new comming queue and job")
q3 := "reclaim-q3"
ctx.queues = append(ctx.queues, q3)
createQueues(ctx)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this will fail if create a queue that already exists unless you changed it as well

Copy link
Contributor Author

@alcorj-mizar alcorj-mizar Jul 6, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I change the createQueues function in previous pr, it will skip the exists queue and will not fail.


err = WaitQueueStatus(ctx, "Open", 1, q1)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue open")

_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j3", q3, "")
Expect(err).NotTo(HaveOccurred(), "Wait for job3 failed")

By("Make sure all job running")

err = WaitQueueStatus(ctx, "Running", 1, q1)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")

err = WaitQueueStatus(ctx, "Running", 1, q2)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")

err = WaitQueueStatus(ctx, "Running", 1, q3)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")

})

It("Reclaim Case 3: New queue with job created no reclaim when job.podGroup.Status.Phase pending", func() {
q1 := defaultQueue
q2 := "reclaim-q2"
j1 := "reclaim-j1"
j2 := "reclaim-j2"
j3 := "reclaim-j3"

ctx := initTestContext(options{
queues: []string{q2},
nodesNumLimit: 3,
nodesResourceLimit: CPU1Mem1,
priorityClasses: map[string]int32{
"low-priority": 10,
"high-priority": 10000,
},
})

defer cleanupTestContext(ctx)

By("Setup initial jobs")

_, err := CreateReclaimJob(ctx, CPU1Mem1, j1, q1, "")
Expect(err).NotTo(HaveOccurred(), "Wait for job1 failed")

_, err = CreateReclaimJob(ctx, CPU1Mem1, j2, q2, "")
Expect(err).NotTo(HaveOccurred(), "Wait for job2 failed")

By("Create new comming queue and job")
q3 := "reclaim-q3"
ctx.queues = append(ctx.queues, q3)
createQueues(ctx)

err = WaitQueueStatus(ctx, "Open", 1, q1)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue open")

_, err = CreateReclaimJob(ctx, CPU1Mem1, j3, q3, "")
Expect(err).NotTo(HaveOccurred(), "Wait for job3 failed")

// delete pod of job3 to make sure reclaim-j3 podgroup is pending
listOptions := metav1.ListOptions{
LabelSelector: labels.Set(map[string]string{batchv1alpha1.JobNameKey: j3}).String(),
}

job3pods, err := ctx.kubeclient.CoreV1().Pods(ctx.namespace).List(context.TODO(), listOptions)
Expect(err).NotTo(HaveOccurred(), "Get %s pod failed", j3)

By("Make sure q1 q2 with job running in it.")
err = WaitQueueStatus(ctx, "Running", 1, q1)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")

err = WaitQueueStatus(ctx, "Running", 1, q2)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")

for _, pod := range job3pods.Items {
fmt.Println(pod.Name)
err = ctx.kubeclient.CoreV1().Pods(pod.Namespace).Delete(context.TODO(), pod.Name, metav1.DeleteOptions{})
Expect(err).NotTo(HaveOccurred(), "Failed to delete pod %s", pod.Name)
}

By("Q3 pending when we delete it.")
err = WaitQueueStatus(ctx, "Pending", 1, q3)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue pending")
})

It("Reclaim Case 4: New queue with job created no reclaim when new queue is not created", func() {
q1 := defaultQueue
q2 := "reclaim-q2"
ctx := initTestContext(options{
queues: []string{q2},
nodesNumLimit: 3,
nodesResourceLimit: CPU1Mem1,
priorityClasses: map[string]int32{
"low-priority": 10,
"high-priority": 10000,
},
})

defer cleanupTestContext(ctx)

By("Setup initial jobs")

_, err := CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j1", q1, "")
Expect(err).NotTo(HaveOccurred(), "Wait for job1 failed")

_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j2", q2, "")
Expect(err).NotTo(HaveOccurred(), "Wait for job2 failed")

By("Create new comming job")
q3 := "reclaim-q3"

_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j3", q3, "")
Expect(err).Should(HaveOccurred(), "job3 create failed when queue3 is not created")

By("Make sure all job running")

err = WaitQueueStatus(ctx, "Running", 1, q1)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")

err = WaitQueueStatus(ctx, "Running", 1, q2)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")
})

// As we agreed, this is not intended behavior, actually, it is a bug.
It("Reclaim Case 5: New queue with job created no reclaim when job or task is low-priority", func() {
q1 := defaultQueue
q2 := "reclaim-q2"
ctx := initTestContext(options{
queues: []string{q2},
nodesNumLimit: 3,
nodesResourceLimit: CPU1Mem1,
priorityClasses: map[string]int32{
"low-priority": 10,
"high-priority": 10000,
},
})

defer cleanupTestContext(ctx)

By("Setup initial jobs")

_, err := CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j1", q1, "high-priority")
Expect(err).NotTo(HaveOccurred(), "Wait for job1 failed")

_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j2", q2, "high-priority")
Expect(err).NotTo(HaveOccurred(), "Wait for job2 failed")

By("Create new comming queue and job")
q3 := "reclaim-q3"

err = WaitQueueStatus(ctx, "Open", 1, q1)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue open")

_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j3", q3, "low-priority")
Expect(err).Should(HaveOccurred(), "job3 create failed when queue3 is not created")

By("Make sure all job running")

err = WaitQueueStatus(ctx, "Running", 1, q1)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")

err = WaitQueueStatus(ctx, "Running", 1, q2)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")
})

It("Reclaim Case 6: New queue with job created no reclaim when overused", func() {
q1 := defaultQueue
q2 := "reclaim-q2"
q3 := "reclaim-q3"
ctx := initTestContext(options{
queues: []string{q2, q3},
nodesNumLimit: 3,
nodesResourceLimit: CPU1Mem1,
priorityClasses: map[string]int32{
"low-priority": 10,
"high-priority": 10000,
Expand All @@ -27,6 +261,17 @@ var _ = Describe("Queue E2E Test", func() {
defer cleanupTestContext(ctx)

By("Setup initial jobs")

_, err := CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j1", q1, "")
Expect(err).NotTo(HaveOccurred(), "Wait for job1 failed")

_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j2", q2, "")
Expect(err).NotTo(HaveOccurred(), "Wait for job2 failed")

_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j3", q3, "")
Expect(err).NotTo(HaveOccurred(), "Wait for job3 failed")

By("Create job4 to testing overused cases.")
job := &jobSpec{
tasks: []taskSpec{
{
Expand All @@ -36,63 +281,82 @@ var _ = Describe("Queue E2E Test", func() {
rep: 1,
},
},
name: "reclaim-j4",
queue: q3,
}

job.name = "reclaim-j1"
job.queue = q1
job.pri = "low-priority"
job1 := createJob(ctx, job)
createJob(ctx, job)

By("Make sure all job running")

job.name = "reclaim-j2"
job.queue = q2
job.pri = "low-priority"
job2 := createJob(ctx, job)
err = WaitQueueStatus(ctx, "Running", 1, q1)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")

err = WaitQueueStatus(ctx, "Running", 1, q2)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")

err = WaitQueueStatus(ctx, "Running", 1, q3)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")

err = WaitQueueStatus(ctx, "Pending", 1, q3)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue pending")
})

It("Reclaim Case 8: New queue with job created no reclaim when task resources less than reclaimable resource", func() {
q1 := defaultQueue
q2 := "reclaim-q2"
ctx := initTestContext(options{
queues: []string{q2},
nodesNumLimit: 3,
nodesResourceLimit: CPU1Mem1,
priorityClasses: map[string]int32{
"low-priority": 10,
"high-priority": 10000,
},
})

defer cleanupTestContext(ctx)

By("Setup initial jobs")

err := waitTasksReady(ctx, job1, 1)
_, err := CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j1", q1, "")
Expect(err).NotTo(HaveOccurred(), "Wait for job1 failed")

err = waitTasksReady(ctx, job2, 1)
_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j2", q2, "")
Expect(err).NotTo(HaveOccurred(), "Wait for job2 failed")

By("Create new comming queue and job")
q3 := "reclaim-q3"
ctx.queues = append(ctx.queues, q3)
createQueues(ctx)

err = waitQueueStatus(func() (bool, error) {
queue, err := ctx.vcclient.SchedulingV1beta1().Queues().Get(context.TODO(), q1, metav1.GetOptions{})
Expect(err).NotTo(HaveOccurred(), "Get queue %s failed", q1)
return queue.Status.State == schedulingv1beta1.QueueStateOpen, nil
})
err = WaitQueueStatus(ctx, "Open", 1, q1)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue open")

job.name = "reclaim-j3"
job.queue = q3
job.pri = "low-priority"
job := &jobSpec{
tasks: []taskSpec{
{
img: defaultNginxImage,
req: CPU4Mem4,
min: 1,
rep: 1,
},
},
name: "reclaim-j4",
queue: q3,
}
createJob(ctx, job)

By("Make sure all job running")
err = waitQueueStatus(func() (bool, error) {
queue, err := ctx.vcclient.SchedulingV1beta1().Queues().Get(context.TODO(), q1, metav1.GetOptions{})
Expect(err).NotTo(HaveOccurred(), "Get queue %s failed", q1)
return queue.Status.Running == 1, nil
})
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")

err = waitQueueStatus(func() (bool, error) {
queue, err := ctx.vcclient.SchedulingV1beta1().Queues().Get(context.TODO(), q2, metav1.GetOptions{})
Expect(err).NotTo(HaveOccurred(), "Get queue %s failed", q2)
return queue.Status.Running == 1, nil
})
err = WaitQueueStatus(ctx, "Running", 1, q1)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")

err = waitQueueStatus(func() (bool, error) {
queue, err := ctx.vcclient.SchedulingV1beta1().Queues().Get(context.TODO(), q3, metav1.GetOptions{})
Expect(err).NotTo(HaveOccurred(), "Get queue %s failed", q3)
return queue.Status.Running == 1, nil
})
err = WaitQueueStatus(ctx, "Running", 1, q2)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")

err = WaitQueueStatus(ctx, "Pending", 1, q3)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")
})

It("Reclaim", func() {
Expand Down
Loading