diff --git a/pkg/controllers/jobset_controller.go b/pkg/controllers/jobset_controller.go index 497d40aa..8533d8c7 100644 --- a/pkg/controllers/jobset_controller.go +++ b/pkg/controllers/jobset_controller.go @@ -730,7 +730,7 @@ func labelAndAnnotateObject(obj metav1.Object, js *jobset.JobSet, rjob *jobset.R labels[jobset.ReplicatedJobReplicas] = strconv.Itoa(int(rjob.Replicas)) labels[jobset.JobIndexKey] = strconv.Itoa(jobIdx) labels[jobset.JobKey] = jobHashKey(js.Namespace, jobName) - labels[jobset.JobIDKey] = calculateJobID(js, rjob, jobIdx) + labels[jobset.JobIDKey] = calculateJobID(js, rjob.Name, jobIdx) // Set annotations on the object. annotations := collections.CloneMap(obj.GetAnnotations()) @@ -740,7 +740,7 @@ func labelAndAnnotateObject(obj metav1.Object, js *jobset.JobSet, rjob *jobset.R annotations[jobset.ReplicatedJobReplicas] = strconv.Itoa(int(rjob.Replicas)) annotations[jobset.JobIndexKey] = strconv.Itoa(jobIdx) annotations[jobset.JobKey] = jobHashKey(js.Namespace, jobName) - annotations[jobset.JobIDKey] = calculateJobID(js, rjob, jobIdx) + annotations[jobset.JobIDKey] = calculateJobID(js, rjob.Name, jobIdx) // Apply coordinator annotation/label if a coordinator is defined in the JobSet spec. if js.Spec.Coordinator != nil { @@ -1035,16 +1035,28 @@ func coordinatorEndpoint(js *jobset.JobSet) string { return fmt.Sprintf("%s-%s-%d-%d.%s", js.Name, js.Spec.Coordinator.ReplicatedJob, js.Spec.Coordinator.JobIndex, js.Spec.Coordinator.PodIndex, GetSubdomain(js)) } -// calculateJobID deterministically assigns a unique integer Job ID for a particular -// job in a jobset. The job index `j` for replicatedJob[i] is calculated as the sum -// of all replicatedJob[k].replicas for k in range 0 to i-1 inclusive, plus `j`. -// This works because the replicatedJobs order is immutable. +// calculateJobID determines the job ID for a given job. The job ID is a unique +// global index for the job, with values ranging from 0 to N-1, +// where N=total number of jobs in the jobset. The job ID is calculated by +// iterating through the replicatedJobs in the order, as defined in the JobSet +// spec, keeping a cumulative sum of total replicas seen so far, then when we +// arrive at the parent replicatedJob of the target job, we add the local job +// index to our running sum of total jobs seen so far, in order to arrive at +// the final job ID value. +// +// Below is a diagram illustrating how job IDs differ from job indexes. +// +// | my-jobset | +// | replicated job A | replicated job B | +// | job ID 0 | jobID 1 | jobID 2 | jobID 3 | +// | job index 0 | job index 1 | job index 0 | job index 1 | +// // Returns an empty string if the parent replicated Job does not exist, // although this should never happen in practice. -func calculateJobID(js *jobset.JobSet, parentReplicatedJob *jobset.ReplicatedJob, jobIdx int) string { +func calculateJobID(js *jobset.JobSet, replicatedJobName string, jobIdx int) string { currTotalJobs := 0 for _, rjob := range js.Spec.ReplicatedJobs { - if rjob.Name == parentReplicatedJob.Name { + if rjob.Name == replicatedJobName { return strconv.Itoa(currTotalJobs + jobIdx) } currTotalJobs += int(rjob.Replicas) diff --git a/pkg/controllers/jobset_controller_test.go b/pkg/controllers/jobset_controller_test.go index 5cb3506c..e26eeb0b 100644 --- a/pkg/controllers/jobset_controller_test.go +++ b/pkg/controllers/jobset_controller_test.go @@ -686,6 +686,15 @@ func TestConstructJobsFromTemplate(t *testing.T) { for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { + // Here we update the expected Jobs with certain features which require + // direct access to the JobSet object itself to calculate. For example, + // the `jobset.sigs.k8s.io/job-id` annotation requires access to the + // full JobSet spec to calculate a unique ID for each Job. + for _, expectedJob := range tc.want { + addJobID(t, tc.js, expectedJob) + } + + // Now get the actual output of constructJobsFromTemplate, and diff the results. var got []*batchv1.Job for _, rjob := range tc.js.Spec.ReplicatedJobs { jobs := constructJobsFromTemplate(tc.js, &rjob, tc.ownedJobs) @@ -699,6 +708,26 @@ func TestConstructJobsFromTemplate(t *testing.T) { } } +// addJobID modifies the Job object in place by adding +// the `jobset.sigs.k8s.io/job-id` label/annotation to both the +// Job itself and the Job template spec.` +func addJobID(t *testing.T, js *jobset.JobSet, job *batchv1.Job) { + t.Helper() + + rjobName := job.Annotations[jobset.ReplicatedJobNameKey] + jobIdx, err := strconv.Atoi(job.Annotations[jobset.JobIndexKey]) + if err != nil { + t.Fatalf("invalid test case: %v", err) + } + // Job label/annotation + job.Labels[jobset.JobIDKey] = calculateJobID(js, rjobName, jobIdx) + job.Annotations[jobset.JobIDKey] = calculateJobID(js, rjobName, jobIdx) + + // Job template spec label/annotation + job.Spec.Template.Labels[jobset.JobIDKey] = calculateJobID(js, rjobName, jobIdx) + job.Spec.Template.Annotations[jobset.JobIDKey] = calculateJobID(js, rjobName, jobIdx) +} + func TestUpdateConditions(t *testing.T) { var ( jobSetName = "test-jobset" @@ -1384,11 +1413,11 @@ func TestCreateHeadlessSvcIfNecessary(t *testing.T) { func TestCalculateJobID(t *testing.T) { tests := []struct { - name string - jobSet *jobset.JobSet - parentReplicatedJob *jobset.ReplicatedJob - jobIdx int - expectedJobID string + name string + jobSet *jobset.JobSet + replicatedJob string + jobIdx int + expectedJobID string }{ { name: "single replicated job", @@ -1399,9 +1428,9 @@ func TestCalculateJobID(t *testing.T) { }, }, }, - parentReplicatedJob: &jobset.ReplicatedJob{Name: "rjob"}, - jobIdx: 1, - expectedJobID: "1", + replicatedJob: "rjob", + jobIdx: 1, + expectedJobID: "1", }, { name: "multiple replicated jobs", @@ -1414,9 +1443,9 @@ func TestCalculateJobID(t *testing.T) { }, }, }, - parentReplicatedJob: &jobset.ReplicatedJob{Name: "rjob2"}, - jobIdx: 3, - expectedJobID: "5", + replicatedJob: "rjob2", + jobIdx: 3, + expectedJobID: "5", }, { name: "replicated job not found", @@ -1427,15 +1456,15 @@ func TestCalculateJobID(t *testing.T) { }, }, }, - parentReplicatedJob: &jobset.ReplicatedJob{Name: "rjob2"}, - jobIdx: 0, - expectedJobID: "", + replicatedJob: "rjob2", + jobIdx: 0, + expectedJobID: "", }, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { - actualJobID := calculateJobID(tc.jobSet, tc.parentReplicatedJob, tc.jobIdx) + actualJobID := calculateJobID(tc.jobSet, tc.replicatedJob, tc.jobIdx) if diff := cmp.Diff(tc.expectedJobID, actualJobID); diff != "" { t.Errorf("unexpected job ID (-want/+got): %s", diff) }