diff --git a/docs/reference/api.md b/docs/reference/api.md index 9a0f75e18e9..d539fdc1f07 100644 --- a/docs/reference/api.md +++ b/docs/reference/api.md @@ -276,6 +276,7 @@ _Appears in:_ | `replicas` _integer_ | Replicas is the number of desired Pods for this worker group. See https://github.com/ray-project/kuberay/pull/1443 for more details about the reason for making this field optional. | 0 | | | `minReplicas` _integer_ | MinReplicas denotes the minimum number of desired Pods for this worker group. | 0 | | | `maxReplicas` _integer_ | MaxReplicas denotes the maximum number of desired Pods for this worker group, and the default value is maxInt32. | 2147483647 | | +| `idleTimeoutSeconds` _integer_ | IdleTimeoutSeconds denotes the number of seconds to wait before the v2 autoscaler terminates an idle worker pod of this type.
This value is only used with the Ray Autoscaler enabled and defaults to the value set by the AutoscalingConfig if not specified for this worker group. | | | | `rayStartParams` _object (keys:string, values:string)_ | RayStartParams are the params of the start command: address, object-store-memory, ... | | | | `template` _[PodTemplateSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#podtemplatespec-v1-core)_ | Template is a pod template for the worker | | | | `scaleStrategy` _[ScaleStrategy](#scalestrategy)_ | ScaleStrategy defines which pods to remove | | | diff --git a/helm-chart/kuberay-operator/crds/ray.io_rayclusters.yaml b/helm-chart/kuberay-operator/crds/ray.io_rayclusters.yaml index 8174d48e73f..a466e41e627 100644 --- a/helm-chart/kuberay-operator/crds/ray.io_rayclusters.yaml +++ b/helm-chart/kuberay-operator/crds/ray.io_rayclusters.yaml @@ -4116,6 +4116,9 @@ spec: properties: groupName: type: string + idleTimeoutSeconds: + format: int32 + type: integer maxReplicas: default: 2147483647 format: int32 diff --git a/helm-chart/kuberay-operator/crds/ray.io_rayjobs.yaml b/helm-chart/kuberay-operator/crds/ray.io_rayjobs.yaml index f9b06f7a321..d89f4761ad8 100644 --- a/helm-chart/kuberay-operator/crds/ray.io_rayjobs.yaml +++ b/helm-chart/kuberay-operator/crds/ray.io_rayjobs.yaml @@ -4128,6 +4128,9 @@ spec: properties: groupName: type: string + idleTimeoutSeconds: + format: int32 + type: integer maxReplicas: default: 2147483647 format: int32 diff --git a/helm-chart/kuberay-operator/crds/ray.io_rayservices.yaml b/helm-chart/kuberay-operator/crds/ray.io_rayservices.yaml index 4a4545280af..09cf34e65bb 100644 --- a/helm-chart/kuberay-operator/crds/ray.io_rayservices.yaml +++ b/helm-chart/kuberay-operator/crds/ray.io_rayservices.yaml @@ -4094,6 +4094,9 @@ spec: properties: groupName: type: string + idleTimeoutSeconds: + format: int32 + type: integer maxReplicas: default: 2147483647 format: int32 diff --git a/ray-operator/apis/ray/v1/raycluster_types.go b/ray-operator/apis/ray/v1/raycluster_types.go index 20e69c399a1..2e21d24b3a7 100644 --- a/ray-operator/apis/ray/v1/raycluster_types.go +++ b/ray-operator/apis/ray/v1/raycluster_types.go @@ -56,6 +56,9 @@ type WorkerGroupSpec struct { // MaxReplicas denotes the maximum number of desired Pods for this worker group, and the default value is maxInt32. // +kubebuilder:default:=2147483647 MaxReplicas *int32 `json:"maxReplicas"` + // IdleTimeoutSeconds denotes the number of seconds to wait before the v2 autoscaler terminates an idle worker pod of this type. + // This value is only used with the Ray Autoscaler enabled and defaults to the value set by the AutoscalingConfig if not specified for this worker group. + IdleTimeoutSeconds *int32 `json:"idleTimeoutSeconds,omitempty"` // RayStartParams are the params of the start command: address, object-store-memory, ... RayStartParams map[string]string `json:"rayStartParams"` // Template is a pod template for the worker diff --git a/ray-operator/apis/ray/v1/zz_generated.deepcopy.go b/ray-operator/apis/ray/v1/zz_generated.deepcopy.go index 0756e855458..d3c086ce6ba 100644 --- a/ray-operator/apis/ray/v1/zz_generated.deepcopy.go +++ b/ray-operator/apis/ray/v1/zz_generated.deepcopy.go @@ -657,6 +657,11 @@ func (in *WorkerGroupSpec) DeepCopyInto(out *WorkerGroupSpec) { *out = new(int32) **out = **in } + if in.IdleTimeoutSeconds != nil { + in, out := &in.IdleTimeoutSeconds, &out.IdleTimeoutSeconds + *out = new(int32) + **out = **in + } if in.RayStartParams != nil { in, out := &in.RayStartParams, &out.RayStartParams *out = make(map[string]string, len(*in)) diff --git a/ray-operator/config/crd/bases/ray.io_rayclusters.yaml b/ray-operator/config/crd/bases/ray.io_rayclusters.yaml index 8174d48e73f..a466e41e627 100644 --- a/ray-operator/config/crd/bases/ray.io_rayclusters.yaml +++ b/ray-operator/config/crd/bases/ray.io_rayclusters.yaml @@ -4116,6 +4116,9 @@ spec: properties: groupName: type: string + idleTimeoutSeconds: + format: int32 + type: integer maxReplicas: default: 2147483647 format: int32 diff --git a/ray-operator/config/crd/bases/ray.io_rayjobs.yaml b/ray-operator/config/crd/bases/ray.io_rayjobs.yaml index f9b06f7a321..d89f4761ad8 100644 --- a/ray-operator/config/crd/bases/ray.io_rayjobs.yaml +++ b/ray-operator/config/crd/bases/ray.io_rayjobs.yaml @@ -4128,6 +4128,9 @@ spec: properties: groupName: type: string + idleTimeoutSeconds: + format: int32 + type: integer maxReplicas: default: 2147483647 format: int32 diff --git a/ray-operator/config/crd/bases/ray.io_rayservices.yaml b/ray-operator/config/crd/bases/ray.io_rayservices.yaml index 4a4545280af..09cf34e65bb 100644 --- a/ray-operator/config/crd/bases/ray.io_rayservices.yaml +++ b/ray-operator/config/crd/bases/ray.io_rayservices.yaml @@ -4094,6 +4094,9 @@ spec: properties: groupName: type: string + idleTimeoutSeconds: + format: int32 + type: integer maxReplicas: default: 2147483647 format: int32 diff --git a/ray-operator/pkg/client/applyconfiguration/ray/v1/workergroupspec.go b/ray-operator/pkg/client/applyconfiguration/ray/v1/workergroupspec.go index b31254d4b92..7d35b865872 100644 --- a/ray-operator/pkg/client/applyconfiguration/ray/v1/workergroupspec.go +++ b/ray-operator/pkg/client/applyconfiguration/ray/v1/workergroupspec.go @@ -9,14 +9,15 @@ import ( // WorkerGroupSpecApplyConfiguration represents an declarative configuration of the WorkerGroupSpec type for use // with apply. type WorkerGroupSpecApplyConfiguration struct { - GroupName *string `json:"groupName,omitempty"` - Replicas *int32 `json:"replicas,omitempty"` - MinReplicas *int32 `json:"minReplicas,omitempty"` - MaxReplicas *int32 `json:"maxReplicas,omitempty"` - RayStartParams map[string]string `json:"rayStartParams,omitempty"` - Template *v1.PodTemplateSpecApplyConfiguration `json:"template,omitempty"` - ScaleStrategy *ScaleStrategyApplyConfiguration `json:"scaleStrategy,omitempty"` - NumOfHosts *int32 `json:"numOfHosts,omitempty"` + GroupName *string `json:"groupName,omitempty"` + Replicas *int32 `json:"replicas,omitempty"` + MinReplicas *int32 `json:"minReplicas,omitempty"` + MaxReplicas *int32 `json:"maxReplicas,omitempty"` + IdleTimeoutSeconds *int32 `json:"idleTimeoutSeconds,omitempty"` + RayStartParams map[string]string `json:"rayStartParams,omitempty"` + Template *v1.PodTemplateSpecApplyConfiguration `json:"template,omitempty"` + ScaleStrategy *ScaleStrategyApplyConfiguration `json:"scaleStrategy,omitempty"` + NumOfHosts *int32 `json:"numOfHosts,omitempty"` } // WorkerGroupSpecApplyConfiguration constructs an declarative configuration of the WorkerGroupSpec type for use with @@ -57,6 +58,14 @@ func (b *WorkerGroupSpecApplyConfiguration) WithMaxReplicas(value int32) *Worker return b } +// WithIdleTimeoutSeconds sets the IdleTimeoutSeconds field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the IdleTimeoutSeconds field is set to the value of the last call. +func (b *WorkerGroupSpecApplyConfiguration) WithIdleTimeoutSeconds(value int32) *WorkerGroupSpecApplyConfiguration { + b.IdleTimeoutSeconds = &value + return b +} + // WithRayStartParams puts the entries into the RayStartParams field in the declarative configuration // and returns the receiver, so that objects can be build by chaining "With" function invocations. // If called multiple times, the entries provided by each call will be put on the RayStartParams field,