Skip to content

Commit

Permalink
Merge pull request #1069 from epam/kep-168
Browse files Browse the repository at this point in the history
KEP 168: Visibility for cluster queue
  • Loading branch information
k8s-ci-robot authored Sep 8, 2023
2 parents 330505e + 3b1f7c1 commit 61ab18f
Show file tree
Hide file tree
Showing 27 changed files with 1,084 additions and 60 deletions.
25 changes: 25 additions & 0 deletions apis/config/v1beta1/configuration_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ type Configuration struct {
// Integrations provide configuration options for AI/ML/Batch frameworks
// integrations (including K8S job).
Integrations *Integrations `json:"integrations,omitempty"`

// QueueVisibility is configuration to expose the information about the top
// pending workloads.
QueueVisibility *QueueVisibility `json:"queueVisibility,omitempty"`
}

type ControllerManager struct {
Expand Down Expand Up @@ -226,3 +230,24 @@ type Integrations struct {
// - "kubeflow.org/tfjob"
Frameworks []string `json:"frameworks,omitempty"`
}

type QueueVisibility struct {
// ClusterQueues is configuration to expose the information
// about the top pending workloads in the cluster queue.
ClusterQueues *ClusterQueueVisibility `json:"clusterQueues,omitempty"`

// UpdateIntervalSeconds specifies the time interval for updates to the structure
// of the top pending workloads in the queues.
// The minimum value is 1.
// Defaults to 5.
UpdateIntervalSeconds int32 `json:"updateIntervalSeconds,omitempty"`
}

type ClusterQueueVisibility struct {
// MaxCount indicates the maximal number of pending workloads exposed in the
// cluster queue status. When the value is set to 0, then ClusterQueue
// visibility updates are disabled.
// The maximal value is 4000.
// Defaults to 10.
MaxCount int32 `json:"maxCount,omitempty"`
}
33 changes: 23 additions & 10 deletions apis/config/v1beta1/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,18 @@ import (
)

const (
DefaultNamespace = "kueue-system"
DefaultWebhookServiceName = "kueue-webhook-service"
DefaultWebhookSecretName = "kueue-webhook-server-cert"
DefaultWebhookPort = 9443
DefaultHealthProbeBindAddress = ":8081"
DefaultMetricsBindAddress = ":8080"
DefaultLeaderElectionID = "c1f6bfd2.kueue.x-k8s.io"
DefaultClientConnectionQPS float32 = 20.0
DefaultClientConnectionBurst int32 = 30
defaultPodsReadyTimeout = 5 * time.Minute
DefaultNamespace = "kueue-system"
DefaultWebhookServiceName = "kueue-webhook-service"
DefaultWebhookSecretName = "kueue-webhook-server-cert"
DefaultWebhookPort = 9443
DefaultHealthProbeBindAddress = ":8081"
DefaultMetricsBindAddress = ":8080"
DefaultLeaderElectionID = "c1f6bfd2.kueue.x-k8s.io"
DefaultClientConnectionQPS float32 = 20.0
DefaultClientConnectionBurst int32 = 30
defaultPodsReadyTimeout = 5 * time.Minute
DefaultQueueVisibilityUpdateIntervalSeconds int32 = 5
DefaultClusterQueuesMaxCount int32 = 10
)

func addDefaultingFuncs(scheme *runtime.Scheme) error {
Expand Down Expand Up @@ -116,4 +118,15 @@ func SetDefaults_Configuration(cfg *Configuration) {
if cfg.Integrations.Frameworks == nil {
cfg.Integrations.Frameworks = []string{job.FrameworkName}
}
if cfg.QueueVisibility == nil {
cfg.QueueVisibility = &QueueVisibility{}
}
if cfg.QueueVisibility.UpdateIntervalSeconds == 0 {
cfg.QueueVisibility.UpdateIntervalSeconds = DefaultQueueVisibilityUpdateIntervalSeconds
}
if cfg.QueueVisibility.ClusterQueues == nil {
cfg.QueueVisibility.ClusterQueues = &ClusterQueueVisibility{
MaxCount: DefaultClusterQueuesMaxCount,
}
}
}
51 changes: 49 additions & 2 deletions apis/config/v1beta1/defaults_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ func TestSetDefaults_Configuration(t *testing.T) {
defaultIntegrations := &Integrations{
Frameworks: []string{job.FrameworkName},
}
defaultQueueVisibility := &QueueVisibility{
UpdateIntervalSeconds: DefaultQueueVisibilityUpdateIntervalSeconds,
ClusterQueues: &ClusterQueueVisibility{
MaxCount: 10,
},
}
podsReadyTimeoutTimeout := metav1.Duration{Duration: defaultPodsReadyTimeout}
podsReadyTimeoutOverwrite := metav1.Duration{Duration: time.Minute}

Expand All @@ -76,6 +82,7 @@ func TestSetDefaults_Configuration(t *testing.T) {
},
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
},
},
"defaulting ControllerManager": {
Expand Down Expand Up @@ -111,6 +118,7 @@ func TestSetDefaults_Configuration(t *testing.T) {
},
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
},
},
"should not default ControllerManager": {
Expand All @@ -133,7 +141,8 @@ func TestSetDefaults_Configuration(t *testing.T) {
InternalCertManagement: &InternalCertManagement{
Enable: ptr.To(false),
},
Integrations: defaultIntegrations,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
},
want: &Configuration{
Namespace: ptr.To(DefaultNamespace),
Expand All @@ -157,6 +166,7 @@ func TestSetDefaults_Configuration(t *testing.T) {
},
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
},
},
"should not set LeaderElectionID": {
Expand Down Expand Up @@ -191,6 +201,7 @@ func TestSetDefaults_Configuration(t *testing.T) {
},
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
},
},
"defaulting InternalCertManagement": {
Expand All @@ -207,6 +218,7 @@ func TestSetDefaults_Configuration(t *testing.T) {
},
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
},
},
"should not default InternalCertManagement": {
Expand All @@ -224,6 +236,7 @@ func TestSetDefaults_Configuration(t *testing.T) {
},
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
},
},
"should not default values in custom ClientConnection": {
Expand All @@ -247,7 +260,8 @@ func TestSetDefaults_Configuration(t *testing.T) {
QPS: ptr.To[float32](123.0),
Burst: ptr.To[int32](456),
},
Integrations: defaultIntegrations,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
},
},
"should default empty custom ClientConnection": {
Expand All @@ -266,6 +280,7 @@ func TestSetDefaults_Configuration(t *testing.T) {
},
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
},
},
"defaulting waitForPodsReady.timeout": {
Expand All @@ -290,6 +305,7 @@ func TestSetDefaults_Configuration(t *testing.T) {
},
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
},
},
"set waitForPodsReady.blockAdmission to false when enable is false": {
Expand All @@ -314,6 +330,7 @@ func TestSetDefaults_Configuration(t *testing.T) {
},
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
},
},
"respecting provided waitForPodsReady.timeout": {
Expand All @@ -339,6 +356,7 @@ func TestSetDefaults_Configuration(t *testing.T) {
},
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
},
},
"integrations": {
Expand All @@ -360,6 +378,35 @@ func TestSetDefaults_Configuration(t *testing.T) {
Integrations: &Integrations{
Frameworks: []string{"a", "b"},
},
QueueVisibility: defaultQueueVisibility,
},
},
"queue visibility": {
original: &Configuration{
InternalCertManagement: &InternalCertManagement{
Enable: ptr.To(false),
},
QueueVisibility: &QueueVisibility{
UpdateIntervalSeconds: 10,
ClusterQueues: &ClusterQueueVisibility{
MaxCount: 0,
},
},
},
want: &Configuration{
Namespace: ptr.To(DefaultNamespace),
ControllerManager: defaultCtrlManagerConfigurationSpec,
InternalCertManagement: &InternalCertManagement{
Enable: ptr.To(false),
},
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: &QueueVisibility{
UpdateIntervalSeconds: 10,
ClusterQueues: &ClusterQueueVisibility{
MaxCount: 0,
},
},
},
},
}
Expand Down
40 changes: 40 additions & 0 deletions apis/config/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 25 additions & 0 deletions apis/kueue/v1beta1/clusterqueue_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,31 @@ type ClusterQueueStatus struct {
// +patchStrategy=merge
// +patchMergeKey=type
Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"`

// PendingWorkloadsStatus contains the information exposed about the current
// status of the pending workloads in the cluster queue.
// +optional
PendingWorkloadsStatus *ClusterQueuePendingWorkloadsStatus `json:"pendingWorkloadsStatus"`
}

type ClusterQueuePendingWorkloadsStatus struct {
// Head contains the list of top pending workloads.
// +listType=atomic
// +optional
Head []ClusterQueuePendingWorkload `json:"clusterQueuePendingWorkload"`

// LastChangeTime indicates the time of the last change of the structure.
LastChangeTime metav1.Time `json:"lastChangeTime"`
}

// ClusterQueuePendingWorkload contains the information identifying a pending workload
// in the cluster queue.
type ClusterQueuePendingWorkload struct {
// Name indicates the name of the pending workload.
Name string `json:"name"`

// Namespace indicates the name of the pending workload.
Namespace string `json:"namespace"`
}

type FlavorUsage struct {
Expand Down
41 changes: 41 additions & 0 deletions apis/kueue/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 61ab18f

Please sign in to comment.