Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

KEP 168: Visibility for cluster queue #1069

Merged
merged 26 commits into from
Sep 8, 2023
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions apis/config/v1beta1/configuration_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ type Configuration struct {
// Integrations provide configuration options for AI/ML/Batch frameworks
// integrations (including K8S job).
Integrations *Integrations `json:"integrations,omitempty"`

// QueueVisibility is configuration to expose the information about the top
// pending workloads.
QueueVisibility *QueueVisibility `json:"queueVisibility,omitempty"`
}

type ControllerManager struct {
Expand Down Expand Up @@ -226,3 +230,24 @@ type Integrations struct {
// - "kubeflow.org/tfjob"
Frameworks []string `json:"frameworks,omitempty"`
}

type QueueVisibility struct {
// ClusterQueues is configuration to expose the information
// about the top pending workloads in the cluster queue.
ClusterQueues *ClusterQueueVisibility `json:"clusterQueues,omitempty"`

// UpdateIntervalSeconds specifies the time interval for updates to the structure
// of the top pending workloads in the queues.
// The minimum value is 1.
// Defaults to 5.
UpdateIntervalSeconds int32 `json:"updateIntervalSeconds,omitempty"`
}

type ClusterQueueVisibility struct {
// MaxCount indicates the maximal number of pending workloads exposed in the
// cluster queue status. When the value is set to 0, then ClusterQueue
// visibility updates are disabled.
// The maximal value is 4000.
// Defaults to 10.
MaxCount int32 `json:"maxCount,omitempty"`
}
33 changes: 23 additions & 10 deletions apis/config/v1beta1/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,18 @@ import (
)

const (
DefaultNamespace = "kueue-system"
DefaultWebhookServiceName = "kueue-webhook-service"
DefaultWebhookSecretName = "kueue-webhook-server-cert"
DefaultWebhookPort = 9443
DefaultHealthProbeBindAddress = ":8081"
DefaultMetricsBindAddress = ":8080"
DefaultLeaderElectionID = "c1f6bfd2.kueue.x-k8s.io"
DefaultClientConnectionQPS float32 = 20.0
DefaultClientConnectionBurst int32 = 30
defaultPodsReadyTimeout = 5 * time.Minute
DefaultNamespace = "kueue-system"
DefaultWebhookServiceName = "kueue-webhook-service"
DefaultWebhookSecretName = "kueue-webhook-server-cert"
DefaultWebhookPort = 9443
DefaultHealthProbeBindAddress = ":8081"
DefaultMetricsBindAddress = ":8080"
DefaultLeaderElectionID = "c1f6bfd2.kueue.x-k8s.io"
DefaultClientConnectionQPS float32 = 20.0
DefaultClientConnectionBurst int32 = 30
defaultPodsReadyTimeout = 5 * time.Minute
DefaultQueueVisibilityUpdateIntervalSeconds int32 = 5
DefaultClusterQueuesMaxCount int32 = 10
)

func addDefaultingFuncs(scheme *runtime.Scheme) error {
Expand Down Expand Up @@ -116,4 +118,15 @@ func SetDefaults_Configuration(cfg *Configuration) {
if cfg.Integrations.Frameworks == nil {
cfg.Integrations.Frameworks = []string{job.FrameworkName}
}
if cfg.QueueVisibility == nil {
cfg.QueueVisibility = &QueueVisibility{}
}
if cfg.QueueVisibility.UpdateIntervalSeconds == 0 {
cfg.QueueVisibility.UpdateIntervalSeconds = DefaultQueueVisibilityUpdateIntervalSeconds
}
if cfg.QueueVisibility.ClusterQueues == nil {
cfg.QueueVisibility.ClusterQueues = &ClusterQueueVisibility{
MaxCount: DefaultClusterQueuesMaxCount,
}
}
}
51 changes: 49 additions & 2 deletions apis/config/v1beta1/defaults_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ func TestSetDefaults_Configuration(t *testing.T) {
defaultIntegrations := &Integrations{
Frameworks: []string{job.FrameworkName},
}
defaultQueueVisibility := &QueueVisibility{
UpdateIntervalSeconds: DefaultQueueVisibilityUpdateIntervalSeconds,
ClusterQueues: &ClusterQueueVisibility{
MaxCount: 10,
},
}
podsReadyTimeoutTimeout := metav1.Duration{Duration: defaultPodsReadyTimeout}
podsReadyTimeoutOverwrite := metav1.Duration{Duration: time.Minute}

Expand All @@ -76,6 +82,7 @@ func TestSetDefaults_Configuration(t *testing.T) {
},
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
},
},
"defaulting ControllerManager": {
Expand Down Expand Up @@ -111,6 +118,7 @@ func TestSetDefaults_Configuration(t *testing.T) {
},
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
},
},
"should not default ControllerManager": {
Expand All @@ -133,7 +141,8 @@ func TestSetDefaults_Configuration(t *testing.T) {
InternalCertManagement: &InternalCertManagement{
Enable: ptr.To(false),
},
Integrations: defaultIntegrations,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
},
want: &Configuration{
Namespace: ptr.To(DefaultNamespace),
Expand All @@ -157,6 +166,7 @@ func TestSetDefaults_Configuration(t *testing.T) {
},
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
},
},
"should not set LeaderElectionID": {
Expand Down Expand Up @@ -191,6 +201,7 @@ func TestSetDefaults_Configuration(t *testing.T) {
},
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
},
},
"defaulting InternalCertManagement": {
Expand All @@ -207,6 +218,7 @@ func TestSetDefaults_Configuration(t *testing.T) {
},
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
},
},
"should not default InternalCertManagement": {
Expand All @@ -224,6 +236,7 @@ func TestSetDefaults_Configuration(t *testing.T) {
},
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
},
},
"should not default values in custom ClientConnection": {
Expand All @@ -247,7 +260,8 @@ func TestSetDefaults_Configuration(t *testing.T) {
QPS: ptr.To[float32](123.0),
Burst: ptr.To[int32](456),
},
Integrations: defaultIntegrations,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
},
},
"should default empty custom ClientConnection": {
Expand All @@ -266,6 +280,7 @@ func TestSetDefaults_Configuration(t *testing.T) {
},
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
},
},
"defaulting waitForPodsReady.timeout": {
Expand All @@ -290,6 +305,7 @@ func TestSetDefaults_Configuration(t *testing.T) {
},
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
},
},
"set waitForPodsReady.blockAdmission to false when enable is false": {
Expand All @@ -314,6 +330,7 @@ func TestSetDefaults_Configuration(t *testing.T) {
},
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
},
},
"respecting provided waitForPodsReady.timeout": {
Expand All @@ -339,6 +356,7 @@ func TestSetDefaults_Configuration(t *testing.T) {
},
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
},
},
"integrations": {
Expand All @@ -360,6 +378,35 @@ func TestSetDefaults_Configuration(t *testing.T) {
Integrations: &Integrations{
Frameworks: []string{"a", "b"},
},
QueueVisibility: defaultQueueVisibility,
},
},
"queue visibility": {
original: &Configuration{
InternalCertManagement: &InternalCertManagement{
Enable: ptr.To(false),
},
QueueVisibility: &QueueVisibility{
UpdateIntervalSeconds: 10,
ClusterQueues: &ClusterQueueVisibility{
MaxCount: 0,
},
},
},
want: &Configuration{
Namespace: ptr.To(DefaultNamespace),
ControllerManager: defaultCtrlManagerConfigurationSpec,
InternalCertManagement: &InternalCertManagement{
Enable: ptr.To(false),
},
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: &QueueVisibility{
UpdateIntervalSeconds: 10,
ClusterQueues: &ClusterQueueVisibility{
MaxCount: 0,
},
},
},
},
}
Expand Down
40 changes: 40 additions & 0 deletions apis/config/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 25 additions & 0 deletions apis/kueue/v1beta1/clusterqueue_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,31 @@ type ClusterQueueStatus struct {
// +patchStrategy=merge
// +patchMergeKey=type
Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"`

// PendingWorkloadsStatus contains the information exposed about the current
// status of the pending workloads in the cluster queue.
// +optional
PendingWorkloadsStatus *ClusterQueuePendingWorkloadsStatus `json:"pendingWorkloadsStatus"`
}

type ClusterQueuePendingWorkloadsStatus struct {
// Head contains the list of top pending workloads.
// +listType=atomic
// +optional
Head []ClusterQueuePendingWorkload `json:"clusterQueuePendingWorkload"`

// LastChangeTime indicates the time of the last change of the structure.
LastChangeTime metav1.Time `json:"lastChangeTime"`
}

// ClusterQueuePendingWorkload contains the information identifying a pending workload
// in the cluster queue.
type ClusterQueuePendingWorkload struct {
// Name indicates the name of the pending workload.
Name string `json:"name"`

// Namespace indicates the name of the pending workload.
Namespace string `json:"namespace"`
}

type FlavorUsage struct {
Expand Down
41 changes: 41 additions & 0 deletions apis/kueue/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading