From b9635d21a5ddbc74c3ba6f852065e6103b73db29 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Fri, 8 Dec 2023 12:02:51 +0800 Subject: [PATCH] pkg/util: refine tidb_server_memory_limit to make the cpu usage more stable (#48927) (#49059) close pingcap/tidb#48741 --- util/gctuner/memory_limit_tuner.go | 32 ++++++-- util/gctuner/memory_limit_tuner_test.go | 104 ++++++++++++++++++++++-- 2 files changed, 124 insertions(+), 12 deletions(-) diff --git a/util/gctuner/memory_limit_tuner.go b/util/gctuner/memory_limit_tuner.go index 1679e012579d0..6c7f9725abb1f 100644 --- a/util/gctuner/memory_limit_tuner.go +++ b/util/gctuner/memory_limit_tuner.go @@ -32,9 +32,11 @@ var GlobalMemoryLimitTuner = &memoryLimitTuner{} // So we can change memory limit dynamically to avoid frequent GC when memory usage is greater than the limit. type memoryLimitTuner struct { finalizer *finalizer - isTuning atomicutil.Bool + isValidValueSet atomicutil.Bool percentage atomicutil.Float64 - waitingReset atomicutil.Bool + adjustPercentageInProgress atomicutil.Bool + serverMemLimitBeforeAdjust atomicutil.Uint64 + percentageBeforeAdjust atomicutil.Float64 nextGCTriggeredByMemoryLimit atomicutil.Bool } @@ -44,7 +46,7 @@ const fallbackPercentage float64 = 1.1 // tuning check the memory nextGC and judge whether this GC is trigger by memory limit. // Go runtime ensure that it will be called serially. func (t *memoryLimitTuner) tuning() { - if !t.isTuning.Load() { + if !t.isValidValueSet.Load() { return } r := memory.ForceReadMemStats() @@ -60,12 +62,23 @@ func (t *memoryLimitTuner) tuning() { // - Only if NextGC >= MemoryLimit , the **next** GC will be triggered by MemoryLimit. Thus, we need to reset // MemoryLimit after the **next** GC happens if needed. if float64(r.HeapInuse)*ratio > float64(debug.SetMemoryLimit(-1)) { - if t.nextGCTriggeredByMemoryLimit.Load() && t.waitingReset.CompareAndSwap(false, true) { + if t.nextGCTriggeredByMemoryLimit.Load() && t.adjustPercentageInProgress.CompareAndSwap(false, true) { + // It's ok to update `adjustPercentageInProgress`, `serverMemLimitBeforeAdjust` and `percentageBeforeAdjust` not in a transaction. + // The update of memory limit is eventually consistent. + t.serverMemLimitBeforeAdjust.Store(memory.ServerMemoryLimit.Load()) + t.percentageBeforeAdjust.Store(t.GetPercentage()) go func() { memory.MemoryLimitGCLast.Store(time.Now()) memory.MemoryLimitGCTotal.Add(1) debug.SetMemoryLimit(t.calcMemoryLimit(fallbackPercentage)) resetInterval := 1 * time.Minute // Wait 1 minute and set back, to avoid frequent GC + failpoint.Inject("mockUpdateGlobalVarDuringAdjustPercentage", func(val failpoint.Value) { + if val, ok := val.(bool); val && ok { + resetInterval = 5 * time.Second + time.Sleep(300 * time.Millisecond) + t.UpdateMemoryLimit() + } + }) failpoint.Inject("testMemoryLimitTuner", func(val failpoint.Value) { if val, ok := val.(bool); val && ok { resetInterval = 1 * time.Second @@ -73,7 +86,7 @@ func (t *memoryLimitTuner) tuning() { }) time.Sleep(resetInterval) debug.SetMemoryLimit(t.calcMemoryLimit(t.GetPercentage())) - for !t.waitingReset.CompareAndSwap(true, false) { + for !t.adjustPercentageInProgress.CompareAndSwap(true, false) { continue } }() @@ -109,12 +122,17 @@ func (t *memoryLimitTuner) GetPercentage() float64 { // UpdateMemoryLimit updates the memory limit. // This function should be called when `tidb_server_memory_limit` or `tidb_server_memory_limit_gc_trigger` is modified. func (t *memoryLimitTuner) UpdateMemoryLimit() { + if t.adjustPercentageInProgress.Load() { + if t.serverMemLimitBeforeAdjust.Load() == memory.ServerMemoryLimit.Load() && t.percentageBeforeAdjust.Load() == t.GetPercentage() { + return + } + } var memoryLimit = t.calcMemoryLimit(t.GetPercentage()) if memoryLimit == math.MaxInt64 { - t.isTuning.Store(false) + t.isValidValueSet.Store(false) memoryLimit = initGOMemoryLimitValue } else { - t.isTuning.Store(true) + t.isValidValueSet.Store(true) } debug.SetMemoryLimit(memoryLimit) } diff --git a/util/gctuner/memory_limit_tuner_test.go b/util/gctuner/memory_limit_tuner_test.go index c6f63215c01dd..842ffa533148c 100644 --- a/util/gctuner/memory_limit_tuner_test.go +++ b/util/gctuner/memory_limit_tuner_test.go @@ -57,11 +57,11 @@ func TestGlobalMemoryTuner(t *testing.T) { memory.ServerMemoryLimit.Store(1 << 30) // 1GB GlobalMemoryLimitTuner.SetPercentage(0.8) // 1GB * 80% = 800MB GlobalMemoryLimitTuner.UpdateMemoryLimit() - require.True(t, GlobalMemoryLimitTuner.isTuning.Load()) + require.True(t, GlobalMemoryLimitTuner.isValidValueSet.Load()) defer func() { time.Sleep(1 * time.Second) // If test.count > 1, wait tuning finished. - require.True(t, GlobalMemoryLimitTuner.isTuning.Load()) - require.False(t, GlobalMemoryLimitTuner.waitingReset.Load()) + require.True(t, GlobalMemoryLimitTuner.isValidValueSet.Load()) + require.False(t, GlobalMemoryLimitTuner.adjustPercentageInProgress.Load()) require.Equal(t, GlobalMemoryLimitTuner.nextGCTriggeredByMemoryLimit.Load(), false) }() @@ -76,7 +76,7 @@ func TestGlobalMemoryTuner(t *testing.T) { runtime.ReadMemStats(r) nextGC := r.NextGC memoryLimit := GlobalMemoryLimitTuner.calcMemoryLimit(GlobalMemoryLimitTuner.GetPercentage()) - // In golang source, nextGC = memoryLimit - three parts memory. + // Refer to golang source code, nextGC = memoryLimit - nonHeapMemory - overageMemory - headroom require.True(t, nextGC < uint64(memoryLimit)) } @@ -85,7 +85,7 @@ func TestGlobalMemoryTuner(t *testing.T) { memory210mb := allocator.alloc(210 << 20) time.Sleep(100 * time.Millisecond) - require.True(t, GlobalMemoryLimitTuner.waitingReset.Load()) + require.True(t, GlobalMemoryLimitTuner.adjustPercentageInProgress.Load()) require.True(t, gcNum < getNowGCNum()) // Test waiting for reset time.Sleep(500 * time.Millisecond) @@ -110,3 +110,97 @@ func TestGlobalMemoryTuner(t *testing.T) { allocator.free(memory210mb) allocator.free(memory600mb) } + +func TestIssue48741(t *testing.T) { + // Close GOGCTuner + gogcTuner := EnableGOGCTuner.Load() + EnableGOGCTuner.Store(false) + defer EnableGOGCTuner.Store(gogcTuner) + + r := &runtime.MemStats{} + getNowGCNum := func() uint32 { + runtime.ReadMemStats(r) + return r.NumGC + } + allocator := &mockAllocator{} + defer allocator.freeAll() + + checkIfMemoryLimitIsModified := func() { + memory.ServerMemoryLimit.Store(1500 << 20) // 1.5 GB + + // Try to trigger GC by 1GB * 80% = 800MB (tidb_server_memory_limit * tidb_server_memory_limit_gc_trigger) + gcNum := getNowGCNum() + memory810mb := allocator.alloc(810 << 20) + require.Eventually(t, + // Wait for the GC triggered by memory810mb + func() bool { + return GlobalMemoryLimitTuner.adjustPercentageInProgress.Load() && gcNum < getNowGCNum() + }, + 500*time.Millisecond, 100*time.Millisecond) + + gcNumAfterMemory810mb := getNowGCNum() + // After the GC triggered by memory810mb. + time.Sleep(4500 * time.Millisecond) + require.Equal(t, debug.SetMemoryLimit(-1), int64(1500<<20*80/100)) + + memory700mb := allocator.alloc(200 << 20) + time.Sleep(5 * time.Second) + // The heapInUse is less than 1.5GB * 80% = 1.2GB, so the gc will not be triggered. + require.Equal(t, gcNumAfterMemory810mb, getNowGCNum()) + + memory150mb := allocator.alloc(300 << 20) + require.Eventually(t, + // Wait for the GC triggered by memory810mb + func() bool { + return GlobalMemoryLimitTuner.adjustPercentageInProgress.Load() && gcNumAfterMemory810mb < getNowGCNum() + }, + 5*time.Second, 100*time.Millisecond) + + time.Sleep(4500 * time.Millisecond) + require.Equal(t, debug.SetMemoryLimit(-1), int64(1500<<20*110/100)) + + allocator.free(memory810mb) + allocator.free(memory700mb) + allocator.free(memory150mb) + } + + checkIfMemoryLimitNotModified := func() { + // Try to trigger GC by 1GB * 80% = 800MB (tidb_server_memory_limit * tidb_server_memory_limit_gc_trigger) + gcNum := getNowGCNum() + memory810mb := allocator.alloc(810 << 20) + require.Eventually(t, + // Wait for the GC triggered by memory810mb + func() bool { + return GlobalMemoryLimitTuner.adjustPercentageInProgress.Load() && gcNum < getNowGCNum() + }, + 500*time.Millisecond, 100*time.Millisecond) + + gcNumAfterMemory810mb := getNowGCNum() + // After the GC triggered by memory810mb. + time.Sleep(4500 * time.Millisecond) + // During the process of adjusting the percentage, the memory limit will be set to 1GB * 110% = 1.1GB. + require.Equal(t, debug.SetMemoryLimit(-1), int64(1<<30*110/100)) + + require.Eventually(t, + // The GC will be trigged immediately after memoryLimit is set back to 1GB * 80% = 800MB. + func() bool { + return GlobalMemoryLimitTuner.adjustPercentageInProgress.Load() && gcNumAfterMemory810mb < getNowGCNum() + }, + 2*time.Second, 100*time.Millisecond) + + allocator.free(memory810mb) + } + + require.NoError(t, failpoint.Enable("github.com/pingcap/tidb/util/gctuner/mockUpdateGlobalVarDuringAdjustPercentage", "return(true)")) + defer func() { + require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/util/gctuner/mockUpdateGlobalVarDuringAdjustPercentage")) + }() + + memory.ServerMemoryLimit.Store(1 << 30) // 1GB + GlobalMemoryLimitTuner.SetPercentage(0.8) // 1GB * 80% = 800MB + GlobalMemoryLimitTuner.UpdateMemoryLimit() + require.Equal(t, debug.SetMemoryLimit(-1), int64(1<<30*80/100)) + + checkIfMemoryLimitNotModified() + checkIfMemoryLimitIsModified() +}