kubernetes · k8s-ci-robot · Jan 30, 2023 · Oct 19, 2022 · Oct 19, 2022 · Oct 31, 2022
diff --git a/vertical-pod-autoscaler/README.md b/vertical-pod-autoscaler/README.md
@@ -296,6 +296,20 @@ Please note the usage of the following arguments to override default names and p
 
 You can then choose which recommender to use by setting `recommenders` inside the `VerticalPodAutoscaler` spec.
 
+### Override default values when OOM occurs
+The below parameters can be used to overwrite the default values when an OOM event is occurred. When VPA observes an OOM it will use a sample that higher of:
+`oom-bump-up-ratio` specifies times the memory usage observed during OOM.
+`oom-min-bump-up-bytes` specifies minimal increase of memory after observing OOM.
+
+Usage in recommender deployment
+```
+  containers:
+  - name: recommender
+    args:
+      - --oom-bump-up-ratio=2.0
+      - --oom-min-bump-up-bytes=524288000
+```
+
 # Known limitations
 
 * Updating running pods is an experimental feature of VPA. Whenever VPA updates

diff --git a/vertical-pod-autoscaler/pkg/recommender/main.go b/vertical-pod-autoscaler/pkg/recommender/main.go
@@ -18,9 +18,10 @@ package main
 
 import (
 	"flag"
-	"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/input"
 	"time"
 
+	"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/input"
+
 	apiv1 "k8s.io/api/core/v1"
 	"k8s.io/autoscaler/vertical-pod-autoscaler/common"
 	"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/input/history"
@@ -65,6 +66,8 @@ var (
 	memoryAggregationIntervalCount = flag.Int64("memory-aggregation-interval-count", model.DefaultMemoryAggregationIntervalCount, `The number of consecutive memory-aggregation-intervals which make up the MemoryAggregationWindowLength which in turn is the period for memory usage aggregation by VPA. In other words, MemoryAggregationWindowLength = memory-aggregation-interval * memory-aggregation-interval-count.`)
 	memoryHistogramDecayHalfLife   = flag.Duration("memory-histogram-decay-half-life", model.DefaultMemoryHistogramDecayHalfLife, `The amount of time it takes a historical memory usage sample to lose half of its weight. In other words, a fresh usage sample is twice as 'important' as one with age equal to the half life period.`)
 	cpuHistogramDecayHalfLife      = flag.Duration("cpu-histogram-decay-half-life", model.DefaultCPUHistogramDecayHalfLife, `The amount of time it takes a historical CPU usage sample to lose half of its weight.`)
+	oomBumpUpRatio                 = flag.Float64("oom-bump-up-ratio", model.DefaultOOMBumpUpRatio, `The bump up ratio when OOM occurred, default is 1.2`)
+	oomMinBumpUp                   = flag.Float64("oom-min-bump-up-bytes", model.DefaultOOMMinBumpUp, `Specifies minimal increase of memory after observing OOM., default is 100 * 1024 * 1024`)
 )
 
 func main() {
@@ -74,7 +77,7 @@ func main() {
 
 	config := common.CreateKubeConfigOrDie(*kubeconfig, float32(*kubeApiQps), int(*kubeApiBurst))
 
-	model.InitializeAggregationsConfig(model.NewAggregationsConfig(*memoryAggregationInterval, *memoryAggregationIntervalCount, *memoryHistogramDecayHalfLife, *cpuHistogramDecayHalfLife))
+	model.InitializeAggregationsConfig(model.NewAggregationsConfig(*memoryAggregationInterval, *memoryAggregationIntervalCount, *memoryHistogramDecayHalfLife, *cpuHistogramDecayHalfLife, *oomBumpUpRatio, *oomMinBumpUp))
 
 	healthCheck := metrics.NewHealthCheck(*metricsFetcherInterval*5, true)
 	metrics.Initialize(*address, healthCheck)

diff --git a/vertical-pod-autoscaler/pkg/recommender/model/aggregations_config.go b/vertical-pod-autoscaler/pkg/recommender/model/aggregations_config.go
@@ -51,6 +51,10 @@ type AggregationsConfig struct {
 	// CPUHistogramDecayHalfLife is the amount of time it takes a historical
 	// CPU usage sample to lose half of its weight.
 	CPUHistogramDecayHalfLife time.Duration
+	// OOMBumpUpRatio specifies how much memory will be added after observing OOM.
+	OOMBumpUpRatio float64
+	// OOMMinBumpUp specifies minimal increase of memory after observing OOM.
+	OOMMinBumpUp float64
 }
 
 const (
@@ -71,6 +75,10 @@ const (
 	// DefaultCPUHistogramDecayHalfLife is the default value for CPUHistogramDecayHalfLife.
 	// CPU usage sample to lose half of its weight.
 	DefaultCPUHistogramDecayHalfLife = time.Hour * 24
+	// DefaultOOMBumpUpRatio specifies how much memory will be added after observing OOM.
+	DefaultOOMBumpUpRatio float64 = 1.2
+	// DefaultOOMMinBumpUp specifies minimal increase of memory after observing OOM.
+	DefaultOOMMinBumpUp float64 = 100 * 1024 * 1024 // 100MB
 )
 
 // GetMemoryAggregationWindowLength returns the total length of the memory usage history aggregated by VPA.
@@ -103,13 +111,15 @@ func (a *AggregationsConfig) memoryHistogramOptions() util.HistogramOptions {
 }
 
 // NewAggregationsConfig creates a new AggregationsConfig based on the supplied parameters and default values.
-func NewAggregationsConfig(memoryAggregationInterval time.Duration, memoryAggregationIntervalCount int64, memoryHistogramDecayHalfLife, cpuHistogramDecayHalfLife time.Duration) *AggregationsConfig {
+func NewAggregationsConfig(memoryAggregationInterval time.Duration, memoryAggregationIntervalCount int64, memoryHistogramDecayHalfLife, cpuHistogramDecayHalfLife time.Duration, oomBumpUpRatio float64, oomMinBumpUp float64) *AggregationsConfig {
 	a := &AggregationsConfig{
 		MemoryAggregationInterval:      memoryAggregationInterval,
 		MemoryAggregationIntervalCount: memoryAggregationIntervalCount,
 		HistogramBucketSizeGrowth:      DefaultHistogramBucketSizeGrowth,
 		MemoryHistogramDecayHalfLife:   memoryHistogramDecayHalfLife,
 		CPUHistogramDecayHalfLife:      cpuHistogramDecayHalfLife,
+		OOMBumpUpRatio:                 oomBumpUpRatio,
+		OOMMinBumpUp:                   oomMinBumpUp,
 	}
 	a.CPUHistogramOptions = a.cpuHistogramOptions()
 	a.MemoryHistogramOptions = a.memoryHistogramOptions()
@@ -121,7 +131,7 @@ var aggregationsConfig *AggregationsConfig
 // GetAggregationsConfig gets the aggregations config. Initializes to default values if not initialized already.
 func GetAggregationsConfig() *AggregationsConfig {
 	if aggregationsConfig == nil {
-		aggregationsConfig = NewAggregationsConfig(DefaultMemoryAggregationInterval, DefaultMemoryAggregationIntervalCount, DefaultMemoryHistogramDecayHalfLife, DefaultCPUHistogramDecayHalfLife)
+		aggregationsConfig = NewAggregationsConfig(DefaultMemoryAggregationInterval, DefaultMemoryAggregationIntervalCount, DefaultMemoryHistogramDecayHalfLife, DefaultCPUHistogramDecayHalfLife, DefaultOOMBumpUpRatio, DefaultOOMMinBumpUp)
 	}
 
 	return aggregationsConfig

diff --git a/vertical-pod-autoscaler/pkg/recommender/model/container.go b/vertical-pod-autoscaler/pkg/recommender/model/container.go
@@ -199,8 +199,8 @@ func (container *ContainerState) RecordOOM(timestamp time.Time, requestedMemory
 	// Get max of the request and the recent usage-based memory peak.
 	// Omitting oomPeak here to protect against recommendation running too high on subsequent OOMs.
 	memoryUsed := ResourceAmountMax(requestedMemory, container.memoryPeak)
-	memoryNeeded := ResourceAmountMax(memoryUsed+MemoryAmountFromBytes(OOMMinBumpUp),
-		ScaleResource(memoryUsed, OOMBumpUpRatio))
+	memoryNeeded := ResourceAmountMax(memoryUsed+MemoryAmountFromBytes(GetAggregationsConfig().OOMMinBumpUp),
+		ScaleResource(memoryUsed, GetAggregationsConfig().OOMBumpUpRatio))
 
 	oomMemorySample := ContainerUsageSample{
 		MeasureStart: timestamp,