Skip to content

Commit

Permalink
Fast admission annotation
Browse files Browse the repository at this point in the history
  • Loading branch information
vladikkuzn committed Oct 3, 2024
1 parent d00703b commit d098d50
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 8 deletions.
32 changes: 31 additions & 1 deletion pkg/controller/jobs/pod/pod_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -667,9 +667,38 @@ func (p *Pod) Load(ctx context.Context, c client.Client, key *types.NamespacedNa
}

func (p *Pod) constructGroupPodSets() ([]kueue.PodSet, error) {
if _, useFastAdmission := p.pod.GetAnnotations()[GroupFastAdmissionAnnotation]; useFastAdmission {
tc, err := p.groupTotalCount()
if err != nil {
return nil, err
}
return constructGroupPodSetsFast(p, tc)
}
return constructGroupPodSets(p.list.Items)
}

func constructGroupPodSetsFast(p *Pod, groupTotalCount int) ([]kueue.PodSet, error) {
podSets := make([]kueue.PodSet, 1)
for i, podInGroup := range p.list.Items {
if !isPodRunnableOrSucceeded(&podInGroup) {
continue
}

roleHash, err := getRoleHash(podInGroup)
if err != nil {
return nil, fmt.Errorf("failed to calculate pod role hash: %w", err)
}

podSet := FromObject(&podInGroup).PodSets()
podSet[0].Name = roleHash
podSet[0].Count = int32(groupTotalCount)
podSets[i] = podSet[0]
return podSets, nil
}

return nil, errors.New("failed to find a runnable pod in the group")
}

func constructGroupPodSets(pods []corev1.Pod) ([]kueue.PodSet, error) {
var resultPodSets []kueue.PodSet

Expand Down Expand Up @@ -713,8 +742,9 @@ func (p *Pod) validatePodGroupMetadata(r record.EventRecorder, activePods []core
return err
}
originalQueue := jobframework.QueueName(p)
_, useFastAdmission := p.pod.GetAnnotations()[GroupFastAdmissionAnnotation]

if len(activePods) < groupTotalCount {
if !useFastAdmission && len(activePods) < groupTotalCount {
errMsg := fmt.Sprintf("'%s' group has fewer runnable pods than expected", podGroupName(p.pod))
r.Eventf(p.Object(), corev1.EventTypeWarning, jobframework.ReasonErrWorkloadCompose, errMsg)
return jobframework.UnretryableError(errMsg)
Expand Down
15 changes: 8 additions & 7 deletions pkg/controller/jobs/pod/pod_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,14 @@ import (
)

const (
ManagedLabelKey = constants.ManagedByKueueLabel
ManagedLabelValue = "true"
PodFinalizer = ManagedLabelKey
GroupNameLabel = "kueue.x-k8s.io/pod-group-name"
GroupTotalCountAnnotation = "kueue.x-k8s.io/pod-group-total-count"
RoleHashAnnotation = "kueue.x-k8s.io/role-hash"
RetriableInGroupAnnotation = "kueue.x-k8s.io/retriable-in-group"
ManagedLabelKey = constants.ManagedByKueueLabel
ManagedLabelValue = "true"
PodFinalizer = ManagedLabelKey
GroupNameLabel = "kueue.x-k8s.io/pod-group-name"
GroupTotalCountAnnotation = "kueue.x-k8s.io/pod-group-total-count"
GroupFastAdmissionAnnotation = "kueue.x-k8s.io/pod-group-fast-admission"
RoleHashAnnotation = "kueue.x-k8s.io/role-hash"
RetriableInGroupAnnotation = "kueue.x-k8s.io/retriable-in-group"
)

var (
Expand Down

0 comments on commit d098d50

Please sign in to comment.