diff --git a/api/jobset/v1alpha2/jobset_types.go b/api/jobset/v1alpha2/jobset_types.go index 5f43f277..c0067452 100644 --- a/api/jobset/v1alpha2/jobset_types.go +++ b/api/jobset/v1alpha2/jobset_types.go @@ -26,7 +26,12 @@ const ( JobIndexKey string = "jobset.sigs.k8s.io/job-index" JobKey string = "jobset.sigs.k8s.io/job-key" JobNameKey string = "job-name" // TODO(#26): Migrate to the fully qualified label name. - ExclusiveKey string = "alpha.jobset.sigs.k8s.io/exclusive-topology" + // ExclusiveKey is an annotation that can be set on the JobSet or on a ReplicatedJob template. + // If set at the JobSet level, all child jobs from all ReplicatedJobs will be scheduled using exclusive + // job placement per topology group (defined as the label value). + // If set at the ReplicatedJob level, all child jobs from the target ReplicatedJobs will be scheduled + // using exclusive job placement per topology group. + ExclusiveKey string = "alpha.jobset.sigs.k8s.io/exclusive-topology" // NodeSelectorStrategyKey is an annotation that acts as a flag, the value does not matter. // If set, the JobSet controller will automatically inject nodeSelectors for the JobSetNameKey label to // ensure exclusive job placement per topology, instead of injecting pod affinity/anti-affinites for this. diff --git a/pkg/controllers/jobset_controller.go b/pkg/controllers/jobset_controller.go index f3afb7db..1a92ff8b 100644 --- a/pkg/controllers/jobset_controller.go +++ b/pkg/controllers/jobset_controller.go @@ -611,18 +611,14 @@ func constructJob(js *jobset.JobSet, rjob *jobset.ReplicatedJob, jobIdx int) (*b job.Spec.Template.Spec.Subdomain = js.Spec.Network.Subdomain } - // If this job should be exclusive per topology, configure the scheduling constraints accordingly. - if _, ok := js.Annotations[jobset.ExclusiveKey]; ok { - // If user has set the nodeSelectorStrategy annotation flag, add the job name label as a - // nodeSelector, and add a toleration for the no schedule taint. - // The node label and node taint must be added separately by a user/script. - if _, exists := js.Annotations[jobset.NodeSelectorStrategyKey]; exists { - addNamespacedJobNodeSelector(job) - addTaintToleration(job) - } - // Otherwise, we fall back to the default strategy of the pod webhook assigning exclusive affinities - // to the leader pod, preventing follower pod creation until leader pod is scheduled, then - // assigning the follower pods to the same node as the leader pods. + // If this job is using the nodeSelectorStrategy implementation of exclusive placement, + // add the job name label as a nodeSelector, and add a toleration for the no schedule taint. + // The node label and node taint must be added to the nodes separately by a user/script. + _, exclusivePlacement := job.Annotations[jobset.ExclusiveKey] + _, nodeSelectorStrategy := job.Annotations[jobset.NodeSelectorStrategyKey] + if exclusivePlacement && nodeSelectorStrategy { + addNamespacedJobNodeSelector(job) + addTaintToleration(job) } // if Suspend is set, then we assume all jobs will be suspended also. @@ -673,10 +669,21 @@ func labelAndAnnotateObject(obj metav1.Object, js *jobset.JobSet, rjob *jobset.R annotations[jobset.JobIndexKey] = strconv.Itoa(jobIdx) annotations[jobset.JobKey] = jobHashKey(js.Namespace, jobName) - // Only set exclusive key label/annotation on jobs/pods if the parent JobSet - // is using exclusive placement. - if _, exists := js.Annotations[jobset.ExclusiveKey]; exists { - annotations[jobset.ExclusiveKey] = js.Annotations[jobset.ExclusiveKey] + // Check for JobSet level exclusive placement. + if topologyDomain, exists := js.Annotations[jobset.ExclusiveKey]; exists { + annotations[jobset.ExclusiveKey] = topologyDomain + // Check if we are using nodeSelectorStrategy implementation of exclusive placement at the JobSet level. + if value, ok := js.Annotations[jobset.NodeSelectorStrategyKey]; ok { + annotations[jobset.NodeSelectorStrategyKey] = value + } + } + // Check for ReplicatedJob level exclusive placement. + if topologyDomain, exists := rjob.Template.Annotations[jobset.ExclusiveKey]; exists { + annotations[jobset.ExclusiveKey] = topologyDomain + // Check if we are using nodeSelectorStrategy implementation of exclusive placement at the ReplicatedJob level. + if value, ok := rjob.Template.Annotations[jobset.NodeSelectorStrategyKey]; ok { + annotations[jobset.NodeSelectorStrategyKey] = value + } } obj.SetLabels(labels) diff --git a/pkg/controllers/jobset_controller_test.go b/pkg/controllers/jobset_controller_test.go index b1cf311a..57edb71d 100644 --- a/pkg/controllers/jobset_controller_test.go +++ b/pkg/controllers/jobset_controller_test.go @@ -302,10 +302,104 @@ func TestConstructJobsFromTemplate(t *testing.T) { }, }, { - name: "exclusive affinities", + name: "exclusive placement for a ReplicatedJob", + js: testutils.MakeJobSet(jobSetName, ns). + // Replicated Job A has exclusive placement annotation. + ReplicatedJob(testutils.MakeReplicatedJob(replicatedJobName + "-A"). + Job(testutils.MakeJobTemplate(jobName, ns). + SetAnnotations(map[string]string{jobset.ExclusiveKey: topologyDomain}). + Obj()). + Replicas(1). + Obj()). + // Replicated Job B has no exclusive placement annotation. + ReplicatedJob(testutils.MakeReplicatedJob(replicatedJobName + "-B"). + Job(testutils.MakeJobTemplate(jobName, ns).Obj()). + Replicas(1). + Obj()). + Obj(), + ownedJobs: &childJobs{}, + want: []*batchv1.Job{ + makeJob(&makeJobArgs{ + jobSetName: jobSetName, + replicatedJobName: replicatedJobName + "-A", + jobName: "test-jobset-replicated-job-A-0", + ns: ns, + replicas: 1, + jobIdx: 0, + topology: topologyDomain}). + Suspend(false).Obj(), + makeJob(&makeJobArgs{ + jobSetName: jobSetName, + replicatedJobName: replicatedJobName + "-B", + jobName: "test-jobset-replicated-job-B-0", + ns: ns, + replicas: 1, + jobIdx: 0}). + Suspend(false).Obj(), + }, + }, + { + name: "exclusive placement using nodeSelectorStrategy for a ReplicatedJob", + js: testutils.MakeJobSet(jobSetName, ns). + // Replicated Job A has exclusive placement annotation. + ReplicatedJob(testutils.MakeReplicatedJob(replicatedJobName + "-A"). + Job(testutils.MakeJobTemplate(jobName, ns). + SetAnnotations(map[string]string{ + jobset.ExclusiveKey: topologyDomain, + jobset.NodeSelectorStrategyKey: "true"}). + Obj()). + Replicas(1). + Obj()). + // Replicated Job B has no exclusive placement annotation. + ReplicatedJob(testutils.MakeReplicatedJob(replicatedJobName + "-B"). + Job(testutils.MakeJobTemplate(jobName, ns).Obj()). + Replicas(1). + Obj()). + Obj(), + ownedJobs: &childJobs{}, + want: []*batchv1.Job{ + makeJob(&makeJobArgs{ + jobSetName: jobSetName, + replicatedJobName: replicatedJobName + "-A", + jobName: "test-jobset-replicated-job-A-0", + ns: ns, + replicas: 1, + jobIdx: 0, + topology: topologyDomain, + nodeSelectorStrategy: true}). + Suspend(false). + NodeSelector(map[string]string{ + jobset.NamespacedJobKey: namespacedJobName(ns, "test-jobset-replicated-job-A-0"), + }). + Tolerations([]corev1.Toleration{ + { + Key: jobset.NoScheduleTaintKey, + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoSchedule, + }, + }). + Obj(), + makeJob(&makeJobArgs{ + jobSetName: jobSetName, + replicatedJobName: replicatedJobName + "-B", + jobName: "test-jobset-replicated-job-B-0", + ns: ns, + replicas: 1, + jobIdx: 0}). + Suspend(false).Obj(), + }, + }, + { + name: "exclusive placement for entire JobSet", js: testutils.MakeJobSet(jobSetName, ns). SetAnnotations(map[string]string{jobset.ExclusiveKey: topologyDomain}). - ReplicatedJob(testutils.MakeReplicatedJob(replicatedJobName). + // Replicated Job A has. + ReplicatedJob(testutils.MakeReplicatedJob(replicatedJobName + "-A"). + Job(testutils.MakeJobTemplate(jobName, ns).Obj()). + Replicas(1). + Obj()). + // Replicated Job B. + ReplicatedJob(testutils.MakeReplicatedJob(replicatedJobName + "-B"). Job(testutils.MakeJobTemplate(jobName, ns).Obj()). Replicas(1). Obj()). @@ -314,13 +408,86 @@ func TestConstructJobsFromTemplate(t *testing.T) { want: []*batchv1.Job{ makeJob(&makeJobArgs{ jobSetName: jobSetName, - replicatedJobName: replicatedJobName, - jobName: "test-jobset-replicated-job-0", + replicatedJobName: replicatedJobName + "-A", + jobName: "test-jobset-replicated-job-A-0", ns: ns, replicas: 1, jobIdx: 0, topology: topologyDomain}). Suspend(false).Obj(), + makeJob(&makeJobArgs{ + jobSetName: jobSetName, + replicatedJobName: replicatedJobName + "-B", + jobName: "test-jobset-replicated-job-B-0", + ns: ns, + replicas: 1, + jobIdx: 0, + topology: topologyDomain}). + Suspend(false).Obj(), + }, + }, + { + name: "exclusive placement using nodeSelectorStrategy for entire JobSet", + js: testutils.MakeJobSet(jobSetName, ns). + SetAnnotations(map[string]string{ + jobset.ExclusiveKey: topologyDomain, + jobset.NodeSelectorStrategyKey: "true", + }). + // Replicated Job A has. + ReplicatedJob(testutils.MakeReplicatedJob(replicatedJobName + "-A"). + Job(testutils.MakeJobTemplate(jobName, ns).Obj()). + Replicas(1). + Obj()). + // Replicated Job B. + ReplicatedJob(testutils.MakeReplicatedJob(replicatedJobName + "-B"). + Job(testutils.MakeJobTemplate(jobName, ns).Obj()). + Replicas(1). + Obj()). + Obj(), + ownedJobs: &childJobs{}, + want: []*batchv1.Job{ + makeJob(&makeJobArgs{ + jobSetName: jobSetName, + replicatedJobName: replicatedJobName + "-A", + jobName: "test-jobset-replicated-job-A-0", + ns: ns, + replicas: 1, + jobIdx: 0, + topology: topologyDomain, + nodeSelectorStrategy: true}). + Suspend(false). + NodeSelector(map[string]string{ + jobset.NamespacedJobKey: namespacedJobName(ns, "test-jobset-replicated-job-A-0"), + }). + Tolerations([]corev1.Toleration{ + { + Key: jobset.NoScheduleTaintKey, + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoSchedule, + }, + }). + Obj(), + makeJob(&makeJobArgs{ + jobSetName: jobSetName, + replicatedJobName: replicatedJobName + "-B", + jobName: "test-jobset-replicated-job-B-0", + ns: ns, + replicas: 1, + jobIdx: 0, + topology: topologyDomain, + nodeSelectorStrategy: true}). + Suspend(false). + NodeSelector(map[string]string{ + jobset.NamespacedJobKey: namespacedJobName(ns, "test-jobset-replicated-job-B-0"), + }). + Tolerations([]corev1.Toleration{ + { + Key: jobset.NoScheduleTaintKey, + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoSchedule, + }, + }). + Obj(), }, }, { @@ -400,14 +567,15 @@ func TestConstructJobsFromTemplate(t *testing.T) { { name: "node selector exclusive placement strategy enabled", js: testutils.MakeJobSet(jobSetName, ns). - SetAnnotations(map[string]string{ - jobset.ExclusiveKey: "topology", - jobset.NodeSelectorStrategyKey: "true", - }). EnableDNSHostnames(true). NetworkSubdomain(jobSetName). ReplicatedJob(testutils.MakeReplicatedJob(replicatedJobName). - Job(testutils.MakeJobTemplate(jobName, ns).Obj()). + Job(testutils.MakeJobTemplate(jobName, ns). + SetAnnotations(map[string]string{ + jobset.ExclusiveKey: topologyDomain, + jobset.NodeSelectorStrategyKey: "true", + }). + Obj()). Subdomain(jobSetName). Replicas(1). Obj()). @@ -415,13 +583,14 @@ func TestConstructJobsFromTemplate(t *testing.T) { ownedJobs: &childJobs{}, want: []*batchv1.Job{ makeJob(&makeJobArgs{ - jobSetName: jobSetName, - replicatedJobName: replicatedJobName, - jobName: "test-jobset-replicated-job-0", - ns: ns, - replicas: 1, - jobIdx: 0, - topology: "topology"}). + jobSetName: jobSetName, + replicatedJobName: replicatedJobName, + jobName: "test-jobset-replicated-job-0", + ns: ns, + replicas: 1, + jobIdx: 0, + topology: topologyDomain, + nodeSelectorStrategy: true}). Suspend(false). Subdomain(jobSetName). NodeSelector(map[string]string{ @@ -899,14 +1068,15 @@ func TestCalculateReplicatedJobStatuses(t *testing.T) { } type makeJobArgs struct { - jobSetName string - replicatedJobName string - jobName string - ns string - replicas int - jobIdx int - restarts int - topology string + jobSetName string + replicatedJobName string + jobName string + ns string + replicas int + jobIdx int + restarts int + topology string + nodeSelectorStrategy bool } func makeJob(args *makeJobArgs) *testutils.JobWrapper { @@ -929,6 +1099,10 @@ func makeJob(args *makeJobArgs) *testutils.JobWrapper { // Only set exclusive key if we are using exclusive placement per topology. if args.topology != "" { annotations[jobset.ExclusiveKey] = args.topology + // Exclusive placement topology domain must be set in order to use the node selector strategy. + if args.nodeSelectorStrategy { + annotations[jobset.NodeSelectorStrategyKey] = "true" + } } jobWrapper := testutils.MakeJob(args.jobName, args.ns). JobLabels(labels). diff --git a/pkg/util/testing/wrappers.go b/pkg/util/testing/wrappers.go index 7401de64..a8a3cf70 100644 --- a/pkg/util/testing/wrappers.go +++ b/pkg/util/testing/wrappers.go @@ -173,6 +173,12 @@ func (j *JobTemplateWrapper) PodSpec(podSpec corev1.PodSpec) *JobTemplateWrapper return j } +// SetAnnotations sets the annotations on the Job template. +func (j *JobTemplateWrapper) SetAnnotations(annotations map[string]string) *JobTemplateWrapper { + j.Annotations = annotations + return j +} + // Obj returns the inner batchv1.JobTemplateSpec func (j *JobTemplateWrapper) Obj() batchv1.JobTemplateSpec { return j.JobTemplateSpec