Skip to content

Commit

Permalink
Add HPA as one option
Browse files Browse the repository at this point in the history
Signed-off-by: kerthcet <kerthcet@gmail.com>
  • Loading branch information
kerthcet committed Jan 22, 2025
1 parent 020c14e commit 87fe5e4
Show file tree
Hide file tree
Showing 9 changed files with 1,227 additions and 1,139 deletions.
20 changes: 13 additions & 7 deletions api/inference/v1alpha1/backendruntime_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ type BackendRuntimeArg struct {
Flags []string `json:"flags,omitempty"`
}

// ScalingPolicy defines the HPA policies for scaling the workloads.
// HPA should be installed in prior.
// HPAConfig represents the configuration of the HorizontalPodAutoscaler.
// Inspired by kubernetes.io/pkg/apis/autoscaling/types.go#HorizontalPodAutoscalerSpec.
type ScalingPolicy struct {
// Note: HPA component should be installed in prior.
type HPAConfig struct {
// metrics contains the specifications for which to use to calculate the
// desired replica count (the maximum replica count across all metrics will
// be used). The desired replica count is calculated multiplying the
Expand All @@ -54,6 +54,13 @@ type ScalingPolicy struct {
Behavior *autoscalingv2.HorizontalPodAutoscalerBehavior `json:"behavior,omitempty"`
}

// ScalePolicy defines the policy for scaling the workloads.
// Support HPA only for now.
type ScalePolicy struct {
// HPA represents the configuration of the HorizontalPodAutoscaler.
HPA *HPAConfig `json:"hpa,omitempty"`
}

// MultiHostCommands represents leader & worker commands for multiple nodes scenarios.
type MultiHostCommands struct {
Leader []string `json:"leader,omitempty"`
Expand Down Expand Up @@ -101,11 +108,10 @@ type BackendRuntimeSpec struct {
// when it might take a long time to load data or warm a cache, than during steady-state operation.
// +optional
StartupProbe *corev1.Probe `json:"startupProbe,omitempty"`
// ScalingPolicy represents the rules for scaling the backend based on the metrics,
// using HPA as the underlying horizontal scaler.
// If playground doesn't define the scalingPolicy, the default policy here will be used.
// ScalePolicy represents the rules for scaling the backend based on the metrics.
// If playground doesn't define the ScalePolicy, the defaulted policy here will be used.
// +optional
ScalingPolicy *ScalingPolicy `json:"scalingPolicy,omitempty"`
ScalePolicy *ScalePolicy `json:"scalePolicy,omitempty"`
}

// BackendRuntimeStatus defines the observed state of BackendRuntime
Expand Down
8 changes: 4 additions & 4 deletions api/inference/v1alpha1/config_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,9 @@ type ElasticConfig struct {
// Default to nil means there's no limit for the instance number.
// +optional
MaxReplicas *int32 `json:"maxReplicas,omitempty"`
// ScalingPolicy defines the HPA policies for scaling the workloads.
// If not defined, the default policy configured in backendRuntime will be used,
// otherwise, the policy here will overwrite the default policy.
// ScalePolicy defines the rules for scaling the workloads.
// If not defined, policy configured in backendRuntime will be used,
// otherwise, policy defined here will overwrite the defaulted ones.
// +optional
ScalingPolicy *ScalingPolicy `json:"scalingPolicy,omitempty"`
ScalePolicy *ScalePolicy `json:"scalePolicy,omitempty"`
}
60 changes: 40 additions & 20 deletions api/inference/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 7 additions & 7 deletions client-go/applyconfiguration/inference/v1alpha1/elasticconfig.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

38 changes: 38 additions & 0 deletions client-go/applyconfiguration/inference/v1alpha1/scalepolicy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions client-go/applyconfiguration/utils.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 87fe5e4

Please sign in to comment.