Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[cherry-pick]fix the driver pod can not be created due to unreasonable admit #2081

Merged
merged 1 commit into from
Mar 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 7 additions & 12 deletions pkg/webhooks/admission/pods/validate/admit_pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,8 @@ func AdmitPods(ar admissionv1.AdmissionReview) *admissionv1.AdmissionResponse {
/*
allow pods to create when
1. schedulerName of pod isn't volcano
2. pod has Podgroup whose phase isn't Pending
3. normal pods whose schedulerName is volcano don't have podgroup.
4. check pod budget annotations configure
2. normal pods whose schedulerName is volcano don't have podgroup.
3. check pod budget annotations configure
*/
func validatePod(pod *v1.Pod, reviewResponse *admissionv1.AdmissionResponse) string {
if pod.Spec.SchedulerName != config.SchedulerName {
Expand All @@ -113,7 +112,7 @@ func validatePod(pod *v1.Pod, reviewResponse *admissionv1.AdmissionResponse) str
pgName = pod.Annotations[vcv1beta1.KubeGroupNameAnnotationKey]
}
if pgName != "" {
if err := checkPGPhase(pod, pgName, true); err != nil {
if err := checkPG(pod, pgName, true); err != nil {
msg = err.Error()
reviewResponse.Allowed = false
}
Expand All @@ -122,7 +121,7 @@ func validatePod(pod *v1.Pod, reviewResponse *admissionv1.AdmissionResponse) str

// normal pod, SN == volcano
pgName = helpers.GeneratePodgroupName(pod)
if err := checkPGPhase(pod, pgName, false); err != nil {
if err := checkPG(pod, pgName, false); err != nil {
msg = err.Error()
reviewResponse.Allowed = false
}
Expand All @@ -136,19 +135,15 @@ func validatePod(pod *v1.Pod, reviewResponse *admissionv1.AdmissionResponse) str
return msg
}

func checkPGPhase(pod *v1.Pod, pgName string, isVCJob bool) error {
pg, err := config.VolcanoClient.SchedulingV1beta1().PodGroups(pod.Namespace).Get(context.TODO(), pgName, metav1.GetOptions{})
func checkPG(pod *v1.Pod, pgName string, isVCJob bool) error {
_, err := config.VolcanoClient.SchedulingV1beta1().PodGroups(pod.Namespace).Get(context.TODO(), pgName, metav1.GetOptions{})
if err != nil {
if isVCJob || (!isVCJob && !apierrors.IsNotFound(err)) {
return fmt.Errorf("failed to get PodGroup for pod <%s/%s>: %v", pod.Namespace, pod.Name, err)
}
return nil
}
if pg.Status.Phase != vcv1beta1.PodGroupPending {
return nil
}
return fmt.Errorf("failed to create pod <%s/%s> as the podgroup phase is Pending",
pod.Namespace, pod.Name)
return nil
}

func validateAnnotation(pod *v1.Pod) error {
Expand Down
47 changes: 0 additions & 47 deletions pkg/webhooks/admission/pods/validate/admit_pod_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ func TestValidatePod(t *testing.T) {

namespace := "test"
pgName := "podgroup-p1"
isController := true

testCases := []struct {
Name string
Expand Down Expand Up @@ -64,52 +63,6 @@ func TestValidatePod(t *testing.T) {
ret: "",
ExpectErr: false,
},
// validate normal pod with volcano scheduler
{
Name: "validate volcano-scheduler normal pod",
Pod: v1.Pod{
TypeMeta: metav1.TypeMeta{
APIVersion: "v1",
Kind: "Pod",
},
ObjectMeta: metav1.ObjectMeta{
Namespace: namespace,
Name: "normal-pod-2",
OwnerReferences: []metav1.OwnerReference{
{UID: "p1", Controller: &isController},
},
},
Spec: v1.PodSpec{
SchedulerName: "volcano",
},
},

reviewResponse: admissionv1.AdmissionResponse{Allowed: false},
ret: "failed to create pod <test/normal-pod-2> as the podgroup phase is Pending",
ExpectErr: true,
},
// validate volcano pod with volcano scheduler
{
Name: "validate volcano-scheduler volcano pod",
Pod: v1.Pod{
TypeMeta: metav1.TypeMeta{
APIVersion: "v1",
Kind: "Pod",
},
ObjectMeta: metav1.ObjectMeta{
Namespace: namespace,
Name: "volcano-pod-1",
Annotations: map[string]string{vcschedulingv1.KubeGroupNameAnnotationKey: pgName},
},
Spec: v1.PodSpec{
SchedulerName: "volcano",
},
},

reviewResponse: admissionv1.AdmissionResponse{Allowed: false},
ret: "failed to create pod <test/volcano-pod-1> as the podgroup phase is Pending",
ExpectErr: true,
},
// validate volcano pod with volcano scheduler when get pg failed
{
Name: "validate volcano volcano pod when get pg failed",
Expand Down
4 changes: 2 additions & 2 deletions test/e2e/jobp/admission.go
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ var _ = ginkgo.Describe("Job E2E Test: Test Admission service", func() {
gomega.Expect(err).NotTo(gomega.HaveOccurred())
})

ginkgo.It("Can't create volcano pod when podgroup is Pending", func() {
ginkgo.It("Allow to create pod when podgroup is Pending", func() {
podName := "pod-volcano"
pgName := "pending-pg"
ctx := e2eutil.InitTestContext(e2eutil.Options{})
Expand Down Expand Up @@ -293,7 +293,7 @@ var _ = ginkgo.Describe("Job E2E Test: Test Admission service", func() {
gomega.Expect(err).NotTo(gomega.HaveOccurred())

_, err = ctx.Kubeclient.CoreV1().Pods(ctx.Namespace).Create(context.TODO(), pod, v1.CreateOptions{})
gomega.Expect(err.Error()).Should(gomega.ContainSubstring(`the podgroup phase is Pending`))
gomega.Expect(err).NotTo(gomega.HaveOccurred())
})

ginkgo.It("Job mutate check", func() {
Expand Down
8 changes: 6 additions & 2 deletions test/e2e/schedulingaction/preempt.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ var _ = Describe("Job E2E Test", func() {
Expect(err).NotTo(HaveOccurred())
})

It("preemption doesn't work when podgroup is pending", func() {
It("preemption doesn't work when podgroup is pending due to insufficient resource", func() {
ctx := e2eutil.InitTestContext(e2eutil.Options{
PriorityClasses: map[string]int32{
highPriority: highPriorityValue,
Expand Down Expand Up @@ -175,8 +175,12 @@ var _ = Describe("Job E2E Test", func() {
PriorityClassName: highPriority,
},
}
// Pod is allowed to be created, preemption does not happen due to PodGroup is in pending state
_, err = ctx.Kubeclient.CoreV1().Pods(ctx.Namespace).Create(context.TODO(), pod, v1.CreateOptions{})
Expect(err).To(HaveOccurred())
Expect(err).NotTo(HaveOccurred())
// Make sure preempteeJob is not preempted as expected
err = e2eutil.WaitTasksReady(ctx, preempteeJob, int(rep))
Expect(err).NotTo(HaveOccurred())
})

It("preemption only works in the same queue", func() {
Expand Down