From a7a599dc2c1fb796482e3d44f77d1118528e82ac Mon Sep 17 00:00:00 2001 From: Michal Wozniak Date: Mon, 5 Feb 2024 12:23:50 +0100 Subject: [PATCH 1/5] Extend documentation on pod group support --- site/content/en/docs/tasks/run_plain_pods.md | 48 ++++++++++++++++++- .../examples/pods-kueue/kueue-pod-group.yaml | 42 ++++++++++++++++ 2 files changed, 88 insertions(+), 2 deletions(-) create mode 100644 site/static/examples/pods-kueue/kueue-pod-group.yaml diff --git a/site/content/en/docs/tasks/run_plain_pods.md b/site/content/en/docs/tasks/run_plain_pods.md index 81a50c8538..f8819308da 100644 --- a/site/content/en/docs/tasks/run_plain_pods.md +++ b/site/content/en/docs/tasks/run_plain_pods.md @@ -1,9 +1,9 @@ --- -title: "Run A Plain Pod" +title: "Run Plain Pods" date: 2023-09-27 weight: 6 description: > - Run a Kueue scheduled Pod. + Run Jobs represented by plain pods, either single pods, or pod groups. --- This page shows how to leverage Kueue's scheduling and resource management capabilities when running plain Pods. @@ -105,3 +105,47 @@ You can create the Pod using the following command: # Create the pod kubectl apply -f kueue-pod.yaml ``` + +## Pod Group definition + +In order to run a set of pods as a single unit, called Pod Group, add the +"pod-group-name" label, and the "pod-group-total-count" annotation to all +members of the group, consistently: + +```yaml +metadata: + labels: + kueue.x-k8s.io/pod-group-name: "group-name" + annotations: + kueue.x-k8s.io/pod-group-total-count: "2" +``` + +## Feature limitations + +Kueue provides only the minimal required functionallity of running pod groups, +just for the need of environments where the pods are managed by external +controllers directly, without a Job-level CRD. + +As a consequence of this design decision Kueue does not re-implement core +functionalities that are available at the Job-level API, such as advanced retry +policies. In particular, Kueue does not re-create failed pods. + +Note that, this design choice impacts the scenario of +[preemption](/docs/concepts/cluster_queue/#preemption). +When a workload represented by the pod group is preempted all of its pods +are killed by Kueue (by delete requests). However, later, when the workload is +re-admitted, Kueue will not re-create the terminated pods. This task is left to +the user (or the external controller). + +**NOTE:** We recommend migration to using Job-level APIs for managing sets of pods. + +## Example Pod Group + +Here is a sample Pod that just sleeps for a few seconds: + +{{< include "examples/pods-kueue/kueue-pod-group.yaml" "yaml" >}} + +You can create the Pod using the following command: +```sh +kubectl apply -f kueue-pod-group.yaml +``` diff --git a/site/static/examples/pods-kueue/kueue-pod-group.yaml b/site/static/examples/pods-kueue/kueue-pod-group.yaml new file mode 100644 index 0000000000..444e5cfa4d --- /dev/null +++ b/site/static/examples/pods-kueue/kueue-pod-group.yaml @@ -0,0 +1,42 @@ +--- +apiVersion: v1 +kind: Pod +metadata: + generateName: sample-pod- + labels: + kueue.x-k8s.io/queue-name: user-queue + kueue.x-k8s.io/pod-group-name: "sample-group" + annotations: + kueue.x-k8s.io/pod-group-total-count: "2" +spec: + containers: + - name: sleep + image: busybox + command: + - sleep + args: + - 3s + resources: + requests: + cpu: 3 +--- +apiVersion: v1 +kind: Pod +metadata: + generateName: sample-pod- + labels: + kueue.x-k8s.io/queue-name: user-queue + kueue.x-k8s.io/pod-group-name: "sample-group" + annotations: + kueue.x-k8s.io/pod-group-total-count: "2" +spec: + containers: + - name: sleep + image: busybox + command: + - sleep + args: + - 3s + resources: + requests: + cpu: 3 From 317ac6af9cf6c8cf0619ec2cd53eb1911499b2fe Mon Sep 17 00:00:00 2001 From: Michal Wozniak Date: Tue, 6 Feb 2024 13:16:24 +0100 Subject: [PATCH 2/5] Review remarks Co-authored-by: Yuki Iwai Co-authored-by: Patryk Bundyra <73306396+PBundyra@users.noreply.github.com> --- site/content/en/docs/tasks/run_plain_pods.md | 52 +++++++++++++------ .../examples/pods-kueue/kueue-pod-group.yaml | 16 +++--- 2 files changed, 43 insertions(+), 25 deletions(-) diff --git a/site/content/en/docs/tasks/run_plain_pods.md b/site/content/en/docs/tasks/run_plain_pods.md index f8819308da..56b9d829b9 100644 --- a/site/content/en/docs/tasks/run_plain_pods.md +++ b/site/content/en/docs/tasks/run_plain_pods.md @@ -3,10 +3,13 @@ title: "Run Plain Pods" date: 2023-09-27 weight: 6 description: > - Run Jobs represented by plain pods, either single pods, or pod groups. + Run a single Pod, or a group of Pods as a Kueue-managed job. --- -This page shows how to leverage Kueue's scheduling and resource management capabilities when running plain Pods. +This page shows how to leverage Kueue's scheduling and resource management +capabilities when running plain Pods. Kueue supports management of both +[individual Pods](#running-a-single-pod-admitted-by-kueue), or +[Pod groups](#running-a-group-of-pods-to-be-admitted-together). This guide is for [batch users](/docs/tasks#batch-user) that have a basic understanding of Kueue. For more information, see [Kueue's overview](/docs/overview). @@ -60,7 +63,7 @@ This guide is for [batch users](/docs/tasks#batch-user) that have a basic unders 4. Check [Administer cluster quotas](/docs/tasks/administer_cluster_quotas) for details on the initial Kueue setup. -## Pod definition +## Running a single Pod admitted by Kueue When running Pods on Kueue, take into consideration the following aspects: @@ -106,7 +109,7 @@ You can create the Pod using the following command: kubectl apply -f kueue-pod.yaml ``` -## Pod Group definition +## Running a group of Pods to be admitted together In order to run a set of pods as a single unit, called Pod Group, add the "pod-group-name" label, and the "pod-group-total-count" annotation to all @@ -122,30 +125,49 @@ metadata: ## Feature limitations -Kueue provides only the minimal required functionallity of running pod groups, +Kueue provides only the minimal required functionality of running pod groups, just for the need of environments where the pods are managed by external controllers directly, without a Job-level CRD. -As a consequence of this design decision Kueue does not re-implement core -functionalities that are available at the Job-level API, such as advanced retry +As a consequence of this design decision, Kueue does not re-implement core +functionalities that are available in the Kubernetes Job API, such as advanced retry policies. In particular, Kueue does not re-create failed pods. -Note that, this design choice impacts the scenario of +This design choice impacts the scenario of [preemption](/docs/concepts/cluster_queue/#preemption). -When a workload represented by the pod group is preempted all of its pods -are killed by Kueue (by delete requests). However, later, when the workload is -re-admitted, Kueue will not re-create the terminated pods. This task is left to -the user (or the external controller). +When a Kueue needs to preempt a workload that represents a pod group, kueue sends +delete requests for all of the pods in the group. It is the responsibility of the +user or controller that created the original pods to create replacement Pods. -**NOTE:** We recommend migration to using Job-level APIs for managing sets of pods. +**NOTE:** We recommend using the kubernetes Job API or similar CRDs such as +JobSet, MPIJob, RayJob, etc. + +## Termination + +Kueue considers a Pod group as successful, and marks the associated Workload as +finished, when the number of succeeded pods equals the pod group size. + +If a Pod group is not successful, there are two ways you may want to use to +terminate execution of a Pod group to free the reserved resources: +1. Issue a Delete request for the Workload object. Kueue will terminate all + remaining pods. +2. Set the `kueue.x-k8s.io/retriable-in-group: false` annotation on at least + one pod in the group (can be a replacement pod). Kueue will mark the workload + as finished once all pods are terminated. ## Example Pod Group -Here is a sample Pod that just sleeps for a few seconds: +Here is a sample Pod Group that just sleeps for a few seconds: {{< include "examples/pods-kueue/kueue-pod-group.yaml" "yaml" >}} -You can create the Pod using the following command: +You can create the Pod Group using the following command: ```sh kubectl apply -f kueue-pod-group.yaml ``` + +The name of the associated Workload created by Kueue equals the name of the Pod +group. In this example it is `sample-group`, you can inspect the workload using: +```sh +kubectl describe workload/sample-group +``` diff --git a/site/static/examples/pods-kueue/kueue-pod-group.yaml b/site/static/examples/pods-kueue/kueue-pod-group.yaml index 444e5cfa4d..b3482108b6 100644 --- a/site/static/examples/pods-kueue/kueue-pod-group.yaml +++ b/site/static/examples/pods-kueue/kueue-pod-group.yaml @@ -2,20 +2,18 @@ apiVersion: v1 kind: Pod metadata: - generateName: sample-pod- + generateName: sample-leader- labels: kueue.x-k8s.io/queue-name: user-queue kueue.x-k8s.io/pod-group-name: "sample-group" annotations: kueue.x-k8s.io/pod-group-total-count: "2" spec: + restartPolicy: Never containers: - name: sleep image: busybox - command: - - sleep - args: - - 3s + command: ["sh", "-c", 'echo "hello world from the leader pod" && sleep 3'] resources: requests: cpu: 3 @@ -23,20 +21,18 @@ spec: apiVersion: v1 kind: Pod metadata: - generateName: sample-pod- + generateName: sample-worker- labels: kueue.x-k8s.io/queue-name: user-queue kueue.x-k8s.io/pod-group-name: "sample-group" annotations: kueue.x-k8s.io/pod-group-total-count: "2" spec: + restartPolicy: Never containers: - name: sleep image: busybox - command: - - sleep - args: - - 3s + command: ["sh", "-c", 'echo "hello world from the worker pod" && sleep 2'] resources: requests: cpu: 3 From 54627b860558838028d750ae174ffd5619ec417d Mon Sep 17 00:00:00 2001 From: Michal Wozniak Date: Thu, 8 Feb 2024 11:36:20 +0100 Subject: [PATCH 3/5] Align casing of pods and pod groups for consistency --- site/content/en/docs/tasks/run_plain_pods.md | 28 ++++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/site/content/en/docs/tasks/run_plain_pods.md b/site/content/en/docs/tasks/run_plain_pods.md index 56b9d829b9..93d6397ca3 100644 --- a/site/content/en/docs/tasks/run_plain_pods.md +++ b/site/content/en/docs/tasks/run_plain_pods.md @@ -111,7 +111,7 @@ kubectl apply -f kueue-pod.yaml ## Running a group of Pods to be admitted together -In order to run a set of pods as a single unit, called Pod Group, add the +In order to run a set of Pods as a single unit, called Pod group, add the "pod-group-name" label, and the "pod-group-total-count" annotation to all members of the group, consistently: @@ -125,19 +125,19 @@ metadata: ## Feature limitations -Kueue provides only the minimal required functionality of running pod groups, -just for the need of environments where the pods are managed by external +Kueue provides only the minimal required functionality of running Pod groups, +just for the need of environments where the Pods are managed by external controllers directly, without a Job-level CRD. As a consequence of this design decision, Kueue does not re-implement core functionalities that are available in the Kubernetes Job API, such as advanced retry -policies. In particular, Kueue does not re-create failed pods. +policies. In particular, Kueue does not re-create failed Pods. This design choice impacts the scenario of [preemption](/docs/concepts/cluster_queue/#preemption). -When a Kueue needs to preempt a workload that represents a pod group, kueue sends -delete requests for all of the pods in the group. It is the responsibility of the -user or controller that created the original pods to create replacement Pods. +When a Kueue needs to preempt a workload that represents a Pod group, kueue sends +delete requests for all of the Pods in the group. It is the responsibility of the +user or controller that created the original Pods to create replacement Pods. **NOTE:** We recommend using the kubernetes Job API or similar CRDs such as JobSet, MPIJob, RayJob, etc. @@ -145,23 +145,23 @@ JobSet, MPIJob, RayJob, etc. ## Termination Kueue considers a Pod group as successful, and marks the associated Workload as -finished, when the number of succeeded pods equals the pod group size. +finished, when the number of succeeded Pods equals the Pod group size. If a Pod group is not successful, there are two ways you may want to use to terminate execution of a Pod group to free the reserved resources: 1. Issue a Delete request for the Workload object. Kueue will terminate all - remaining pods. + remaining Pods. 2. Set the `kueue.x-k8s.io/retriable-in-group: false` annotation on at least - one pod in the group (can be a replacement pod). Kueue will mark the workload - as finished once all pods are terminated. + one Pod in the group (can be a replacement Pod). Kueue will mark the workload + as finished once all Pods are terminated. -## Example Pod Group +## Example Pod group -Here is a sample Pod Group that just sleeps for a few seconds: +Here is a sample Pod group that just sleeps for a few seconds: {{< include "examples/pods-kueue/kueue-pod-group.yaml" "yaml" >}} -You can create the Pod Group using the following command: +You can create the Pod group using the following command: ```sh kubectl apply -f kueue-pod-group.yaml ``` From 7d302e73018c6203c9d413a8f292de9fc287d545 Mon Sep 17 00:00:00 2001 From: Michal Wozniak Date: Thu, 8 Feb 2024 12:52:29 +0100 Subject: [PATCH 4/5] review remarks --- site/content/en/docs/tasks/run_plain_pods.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/site/content/en/docs/tasks/run_plain_pods.md b/site/content/en/docs/tasks/run_plain_pods.md index 93d6397ca3..267521309f 100644 --- a/site/content/en/docs/tasks/run_plain_pods.md +++ b/site/content/en/docs/tasks/run_plain_pods.md @@ -140,7 +140,7 @@ delete requests for all of the Pods in the group. It is the responsibility of th user or controller that created the original Pods to create replacement Pods. **NOTE:** We recommend using the kubernetes Job API or similar CRDs such as -JobSet, MPIJob, RayJob, etc. +JobSet, MPIJob, RayJob (see more [here](/docs/tasks/#batch-user)). ## Termination From aa9fb104582ad0b63eaa99176bb45adaf6765b45 Mon Sep 17 00:00:00 2001 From: Michal Wozniak Date: Fri, 9 Feb 2024 10:54:21 +0100 Subject: [PATCH 5/5] Fix indentation --- site/content/en/docs/tasks/run_plain_pods.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/site/content/en/docs/tasks/run_plain_pods.md b/site/content/en/docs/tasks/run_plain_pods.md index 267521309f..0bde887cc2 100644 --- a/site/content/en/docs/tasks/run_plain_pods.md +++ b/site/content/en/docs/tasks/run_plain_pods.md @@ -123,7 +123,7 @@ metadata: kueue.x-k8s.io/pod-group-total-count: "2" ``` -## Feature limitations +### Feature limitations Kueue provides only the minimal required functionality of running Pod groups, just for the need of environments where the Pods are managed by external @@ -142,7 +142,7 @@ user or controller that created the original Pods to create replacement Pods. **NOTE:** We recommend using the kubernetes Job API or similar CRDs such as JobSet, MPIJob, RayJob (see more [here](/docs/tasks/#batch-user)). -## Termination +### Termination Kueue considers a Pod group as successful, and marks the associated Workload as finished, when the number of succeeded Pods equals the Pod group size. @@ -155,7 +155,7 @@ terminate execution of a Pod group to free the reserved resources: one Pod in the group (can be a replacement Pod). Kueue will mark the workload as finished once all Pods are terminated. -## Example Pod group +### Example Pod group Here is a sample Pod group that just sleeps for a few seconds: