Skip to content

Commit

Permalink
Merge pull request #5275 from navinjoy/oom-params
Browse files Browse the repository at this point in the history
VPA: make parameters oomBumpUpRatio and oomMinBumpUp configurable
  • Loading branch information
k8s-ci-robot authored Jan 30, 2023
2 parents 4d94120 + c36f6ca commit 65c098b
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 7 deletions.
18 changes: 17 additions & 1 deletion vertical-pod-autoscaler/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,22 @@ Please note the usage of the following arguments to override default names and p
You can then choose which recommender to use by setting `recommenders` inside the `VerticalPodAutoscaler` spec.
### Custom memory bump-up after OOMKill
After an OOMKill event was observed, VPA increases the memory recommendation based on the observed memory usage in the event according to this formula: `recommendation = memory-usage-in-oomkill-event + max(oom-min-bump-up-bytes, memory-usage-in-oomkill-event * oom-bump-up-ratio)`.
You can configure the minimum bump-up as well as the multiplier by specifying startup arguments for the recommender:
`oom-bump-up-ratio` specifies the memory bump up ratio when OOM occurred, default is `1.2`. This means, memory will be increased by 20% after an OOMKill event.
`oom-min-bump-up-bytes` specifies minimal increase of memory after observing OOM. Defaults to `100 * 1024 * 1024` (=100MiB)
Usage in recommender deployment
```
containers:
- name: recommender
args:
- --oom-bump-up-ratio=2.0
- --oom-min-bump-up-bytes=524288000
```
### Using CPU management with static policy
If you are using the [CPU management with static policy](https://kubernetes.io/docs/tasks/administer-cluster/cpu-management-policies/#static-policy) for some containers,
Expand All @@ -314,7 +330,7 @@ The annotation format is the following:
```
vpa-post-processor.kubernetes.io/{containerName}_integerCPU=true
```
# Known limitations
* Whenever VPA updates the pod resources, the pod is recreated, which causes all
Expand Down
7 changes: 5 additions & 2 deletions vertical-pod-autoscaler/pkg/recommender/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@ package main

import (
"flag"
"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/input"
"time"

"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/input"

apiv1 "k8s.io/api/core/v1"
"k8s.io/autoscaler/vertical-pod-autoscaler/common"
"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/input/history"
Expand Down Expand Up @@ -65,6 +66,8 @@ var (
memoryAggregationIntervalCount = flag.Int64("memory-aggregation-interval-count", model.DefaultMemoryAggregationIntervalCount, `The number of consecutive memory-aggregation-intervals which make up the MemoryAggregationWindowLength which in turn is the period for memory usage aggregation by VPA. In other words, MemoryAggregationWindowLength = memory-aggregation-interval * memory-aggregation-interval-count.`)
memoryHistogramDecayHalfLife = flag.Duration("memory-histogram-decay-half-life", model.DefaultMemoryHistogramDecayHalfLife, `The amount of time it takes a historical memory usage sample to lose half of its weight. In other words, a fresh usage sample is twice as 'important' as one with age equal to the half life period.`)
cpuHistogramDecayHalfLife = flag.Duration("cpu-histogram-decay-half-life", model.DefaultCPUHistogramDecayHalfLife, `The amount of time it takes a historical CPU usage sample to lose half of its weight.`)
oomBumpUpRatio = flag.Float64("oom-bump-up-ratio", model.DefaultOOMBumpUpRatio, `The memory bump up ratio when OOM occurred, default is 1.2.`)
oomMinBumpUp = flag.Float64("oom-min-bump-up-bytes", model.DefaultOOMMinBumpUp, `The minimal increase of memory when OOM occurred in bytes, default is 100 * 1024 * 1024`)
)

// Post processors flags
Expand All @@ -80,7 +83,7 @@ func main() {

config := common.CreateKubeConfigOrDie(*kubeconfig, float32(*kubeApiQps), int(*kubeApiBurst))

model.InitializeAggregationsConfig(model.NewAggregationsConfig(*memoryAggregationInterval, *memoryAggregationIntervalCount, *memoryHistogramDecayHalfLife, *cpuHistogramDecayHalfLife))
model.InitializeAggregationsConfig(model.NewAggregationsConfig(*memoryAggregationInterval, *memoryAggregationIntervalCount, *memoryHistogramDecayHalfLife, *cpuHistogramDecayHalfLife, *oomBumpUpRatio, *oomMinBumpUp))

healthCheck := metrics.NewHealthCheck(*metricsFetcherInterval*5, true)
metrics.Initialize(*address, healthCheck)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ type AggregationsConfig struct {
// CPUHistogramDecayHalfLife is the amount of time it takes a historical
// CPU usage sample to lose half of its weight.
CPUHistogramDecayHalfLife time.Duration
// OOMBumpUpRatio specifies the memory bump up ratio when OOM occurred.
OOMBumpUpRatio float64
// OOMMinBumpUp specifies the minimal increase of memory when OOM occurred in bytes.
OOMMinBumpUp float64
}

const (
Expand All @@ -71,6 +75,10 @@ const (
// DefaultCPUHistogramDecayHalfLife is the default value for CPUHistogramDecayHalfLife.
// CPU usage sample to lose half of its weight.
DefaultCPUHistogramDecayHalfLife = time.Hour * 24
// DefaultOOMBumpUpRatio is the default value for OOMBumpUpRatio.
DefaultOOMBumpUpRatio float64 = 1.2 // Memory is increased by 20% after an OOMKill.
// DefaultOOMMinBumpUp is the default value for OOMMinBumpUp.
DefaultOOMMinBumpUp float64 = 100 * 1024 * 1024 // Memory is increased by at least 100MB after an OOMKill.
)

// GetMemoryAggregationWindowLength returns the total length of the memory usage history aggregated by VPA.
Expand Down Expand Up @@ -103,13 +111,15 @@ func (a *AggregationsConfig) memoryHistogramOptions() util.HistogramOptions {
}

// NewAggregationsConfig creates a new AggregationsConfig based on the supplied parameters and default values.
func NewAggregationsConfig(memoryAggregationInterval time.Duration, memoryAggregationIntervalCount int64, memoryHistogramDecayHalfLife, cpuHistogramDecayHalfLife time.Duration) *AggregationsConfig {
func NewAggregationsConfig(memoryAggregationInterval time.Duration, memoryAggregationIntervalCount int64, memoryHistogramDecayHalfLife, cpuHistogramDecayHalfLife time.Duration, oomBumpUpRatio float64, oomMinBumpUp float64) *AggregationsConfig {
a := &AggregationsConfig{
MemoryAggregationInterval: memoryAggregationInterval,
MemoryAggregationIntervalCount: memoryAggregationIntervalCount,
HistogramBucketSizeGrowth: DefaultHistogramBucketSizeGrowth,
MemoryHistogramDecayHalfLife: memoryHistogramDecayHalfLife,
CPUHistogramDecayHalfLife: cpuHistogramDecayHalfLife,
OOMBumpUpRatio: oomBumpUpRatio,
OOMMinBumpUp: oomMinBumpUp,
}
a.CPUHistogramOptions = a.cpuHistogramOptions()
a.MemoryHistogramOptions = a.memoryHistogramOptions()
Expand All @@ -121,7 +131,7 @@ var aggregationsConfig *AggregationsConfig
// GetAggregationsConfig gets the aggregations config. Initializes to default values if not initialized already.
func GetAggregationsConfig() *AggregationsConfig {
if aggregationsConfig == nil {
aggregationsConfig = NewAggregationsConfig(DefaultMemoryAggregationInterval, DefaultMemoryAggregationIntervalCount, DefaultMemoryHistogramDecayHalfLife, DefaultCPUHistogramDecayHalfLife)
aggregationsConfig = NewAggregationsConfig(DefaultMemoryAggregationInterval, DefaultMemoryAggregationIntervalCount, DefaultMemoryHistogramDecayHalfLife, DefaultCPUHistogramDecayHalfLife, DefaultOOMBumpUpRatio, DefaultOOMMinBumpUp)
}

return aggregationsConfig
Expand Down
4 changes: 2 additions & 2 deletions vertical-pod-autoscaler/pkg/recommender/model/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,8 @@ func (container *ContainerState) RecordOOM(timestamp time.Time, requestedMemory
// Get max of the request and the recent usage-based memory peak.
// Omitting oomPeak here to protect against recommendation running too high on subsequent OOMs.
memoryUsed := ResourceAmountMax(requestedMemory, container.memoryPeak)
memoryNeeded := ResourceAmountMax(memoryUsed+MemoryAmountFromBytes(OOMMinBumpUp),
ScaleResource(memoryUsed, OOMBumpUpRatio))
memoryNeeded := ResourceAmountMax(memoryUsed+MemoryAmountFromBytes(GetAggregationsConfig().OOMMinBumpUp),
ScaleResource(memoryUsed, GetAggregationsConfig().OOMBumpUpRatio))

oomMemorySample := ContainerUsageSample{
MeasureStart: timestamp,
Expand Down

0 comments on commit 65c098b

Please sign in to comment.