From 05207d6819fb14095aecd4017099da9d1b3c0c1f Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Mon, 8 Jul 2024 15:51:02 +0800 Subject: [PATCH] controller: fix the low_ru request missed (#8368) (#8371) close tikv/pd#8349 controller: fix the low_ru request missed The problem is that `c.run.currentRequests` is shared by all groups. If one group triggers a token request that isn't handled by the response, the other group's requests will be discarded. Here, we do not discard the low_ru triggers. Signed-off-by: ti-chi-bot Signed-off-by: nolouch Co-authored-by: ShuNing Co-authored-by: nolouch --- client/go.mod | 1 + client/go.sum | 1 + .../resource_group/controller/controller.go | 12 +- .../controller/controller_test.go | 139 ++++++++++++++++++ .../resource_group/controller/limiter_test.go | 15 +- 5 files changed, 164 insertions(+), 4 deletions(-) diff --git a/client/go.mod b/client/go.mod index dcbbd02ad9b..62ffc7e7a04 100644 --- a/client/go.mod +++ b/client/go.mod @@ -31,6 +31,7 @@ require ( github.com/prometheus/client_model v0.2.0 // indirect github.com/prometheus/common v0.26.0 // indirect github.com/prometheus/procfs v0.6.0 // indirect + github.com/stretchr/objx v0.5.0 // indirect go.uber.org/multierr v1.11.0 // indirect golang.org/x/net v0.17.0 // indirect golang.org/x/sys v0.13.0 // indirect diff --git a/client/go.sum b/client/go.sum index 8b35ed71bd8..d945175c38e 100644 --- a/client/go.sum +++ b/client/go.sum @@ -118,6 +118,7 @@ github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrf github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= diff --git a/client/resource_group/controller/controller.go b/client/resource_group/controller/controller.go index 06e9bb780ef..84629090e04 100755 --- a/client/resource_group/controller/controller.go +++ b/client/resource_group/controller/controller.go @@ -318,9 +318,7 @@ func (c *ResourceGroupsController) Start(ctx context.Context) { case notifyMsg := <-c.lowTokenNotifyChan: c.executeOnAllGroups((*groupCostController).updateRunState) c.executeOnAllGroups((*groupCostController).updateAvgRequestResourcePerSec) - if len(c.run.currentRequests) == 0 { - c.collectTokenBucketRequests(c.loopCtx, FromLowRU, lowToken /* select low tokens resource group */, notifyMsg) - } + c.collectTokenBucketRequests(c.loopCtx, FromLowRU, lowToken /* select low tokens resource group */, notifyMsg) if c.run.inDegradedMode { c.executeOnAllGroups((*groupCostController).applyDegradedMode) } @@ -1136,11 +1134,19 @@ func (gc *groupCostController) collectRequestAndConsumption(selectTyp selectType switch selectTyp { case periodicReport: selected = selected || gc.shouldReportConsumption() + failpoint.Inject("triggerPeriodicReport", func(val failpoint.Value) { + selected = gc.name == val.(string) + }) fallthrough case lowToken: if counter.limiter.IsLowTokens() { selected = true } + failpoint.Inject("triggerLowRUReport", func(val failpoint.Value) { + if selectTyp == lowToken { + selected = gc.name == val.(string) + } + }) } request := &rmpb.RequestUnitItem{ Type: typ, diff --git a/client/resource_group/controller/controller_test.go b/client/resource_group/controller/controller_test.go index fbd3ab0548f..cf209afed27 100644 --- a/client/resource_group/controller/controller_test.go +++ b/client/resource_group/controller/controller_test.go @@ -24,8 +24,12 @@ import ( "testing" "time" + "github.com/pingcap/failpoint" + "github.com/pingcap/kvproto/pkg/meta_storagepb" rmpb "github.com/pingcap/kvproto/pkg/resource_manager" + "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" + pd "github.com/tikv/pd/client" "github.com/tikv/pd/client/errs" ) @@ -127,3 +131,138 @@ func TestResourceGroupThrottledError(t *testing.T) { re.Error(err) re.True(errs.ErrClientResourceGroupThrottled.Equal(err)) } + +// MockResourceGroupProvider is a mock implementation of the ResourceGroupProvider interface. +type MockResourceGroupProvider struct { + mock.Mock +} + +func (m *MockResourceGroupProvider) GetResourceGroup(ctx context.Context, resourceGroupName string) (*rmpb.ResourceGroup, error) { + args := m.Called(ctx, resourceGroupName) + return args.Get(0).(*rmpb.ResourceGroup), args.Error(1) +} + +func (m *MockResourceGroupProvider) ListResourceGroups(ctx context.Context) ([]*rmpb.ResourceGroup, error) { + args := m.Called(ctx) + return args.Get(0).([]*rmpb.ResourceGroup), args.Error(1) +} + +func (m *MockResourceGroupProvider) AddResourceGroup(ctx context.Context, metaGroup *rmpb.ResourceGroup) (string, error) { + args := m.Called(ctx, metaGroup) + return args.String(0), args.Error(1) +} + +func (m *MockResourceGroupProvider) ModifyResourceGroup(ctx context.Context, metaGroup *rmpb.ResourceGroup) (string, error) { + args := m.Called(ctx, metaGroup) + return args.String(0), args.Error(1) +} + +func (m *MockResourceGroupProvider) DeleteResourceGroup(ctx context.Context, resourceGroupName string) (string, error) { + args := m.Called(ctx, resourceGroupName) + return args.String(0), args.Error(1) +} + +func (m *MockResourceGroupProvider) AcquireTokenBuckets(ctx context.Context, request *rmpb.TokenBucketsRequest) ([]*rmpb.TokenBucketResponse, error) { + args := m.Called(ctx, request) + return args.Get(0).([]*rmpb.TokenBucketResponse), args.Error(1) +} + +func (m *MockResourceGroupProvider) LoadResourceGroups(ctx context.Context) ([]*rmpb.ResourceGroup, int64, error) { + args := m.Called(ctx) + return args.Get(0).([]*rmpb.ResourceGroup), args.Get(1).(int64), args.Error(2) +} + +func (m *MockResourceGroupProvider) Watch(ctx context.Context, key []byte, opts ...pd.OpOption) (chan []*meta_storagepb.Event, error) { + args := m.Called(ctx, key, opts) + return args.Get(0).(chan []*meta_storagepb.Event), args.Error(1) +} + +func (m *MockResourceGroupProvider) Get(ctx context.Context, key []byte, opts ...pd.OpOption) (*meta_storagepb.GetResponse, error) { + args := m.Called(ctx, key, opts) + return args.Get(0).(*meta_storagepb.GetResponse), args.Error(1) +} + +func TestControllerWithTwoGroupRequestConcurrency(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + mockProvider := new(MockResourceGroupProvider) + + mockProvider.On("Get", mock.Anything, mock.Anything, mock.Anything).Return(&meta_storagepb.GetResponse{}, nil) + // LoadResourceGroups + mockProvider.On("LoadResourceGroups", mock.Anything).Return([]*rmpb.ResourceGroup{}, int64(0), nil) + // Watch + mockProvider.On("Watch", mock.Anything, mock.Anything, mock.Anything).Return(make(chan []*meta_storagepb.Event), nil) + + re.NoError(failpoint.Enable("github.com/tikv/pd/client/resource_group/controller/triggerPeriodicReport", fmt.Sprintf("return(\"%s\")", "default"))) + defer failpoint.Disable("github.com/tikv/pd/client/resource_group/controller/triggerPeriodicReport") + re.NoError(failpoint.Enable("github.com/tikv/pd/client/resource_group/controller/triggerLowRUReport", fmt.Sprintf("return(\"%s\")", "test-group"))) + defer failpoint.Disable("github.com/tikv/pd/client/resource_group/controller/triggerLowRUReport") + + controller, _ := NewResourceGroupController(ctx, 1, mockProvider, nil) + controller.Start(ctx) + + defaultResourceGroup := &rmpb.ResourceGroup{Name: "default", Mode: rmpb.GroupMode_RUMode, RUSettings: &rmpb.GroupRequestUnitSettings{RU: &rmpb.TokenBucket{Settings: &rmpb.TokenLimitSettings{FillRate: 1000000}}}} + testResourceGroup := &rmpb.ResourceGroup{Name: "test-group", Mode: rmpb.GroupMode_RUMode, RUSettings: &rmpb.GroupRequestUnitSettings{RU: &rmpb.TokenBucket{Settings: &rmpb.TokenLimitSettings{FillRate: 1000000}}}} + mockProvider.On("GetResourceGroup", mock.Anything, "default", mock.Anything).Return(defaultResourceGroup, nil) + mockProvider.On("GetResourceGroup", mock.Anything, "test-group", mock.Anything).Return(testResourceGroup, nil) + + c1, err := controller.tryGetResourceGroup(ctx, "default") + re.NoError(err) + re.Equal(defaultResourceGroup, c1.meta) + + c2, err := controller.tryGetResourceGroup(ctx, "test-group") + re.NoError(err) + re.Equal(testResourceGroup, c2.meta) + + var expectResp []*rmpb.TokenBucketResponse + recTestGroupAcquireTokenRequest := make(chan bool) + mockProvider.On("AcquireTokenBuckets", mock.Anything, mock.Anything).Run(func(args mock.Arguments) { + request := args.Get(1).(*rmpb.TokenBucketsRequest) + var responses []*rmpb.TokenBucketResponse + for _, req := range request.Requests { + if req.ResourceGroupName == "default" { + // no response the default group request, that's mean `len(c.run.currentRequests) != 0` always. + time.Sleep(100 * time.Second) + responses = append(responses, &rmpb.TokenBucketResponse{ + ResourceGroupName: "default", + GrantedRUTokens: []*rmpb.GrantedRUTokenBucket{ + { + GrantedTokens: &rmpb.TokenBucket{ + Tokens: 100000, + }, + }, + }, + }) + } else { + responses = append(responses, &rmpb.TokenBucketResponse{ + ResourceGroupName: req.ResourceGroupName, + GrantedRUTokens: []*rmpb.GrantedRUTokenBucket{ + { + GrantedTokens: &rmpb.TokenBucket{ + Tokens: 100000, + }, + }, + }, + }) + } + } + // receive test-group request + if len(request.Requests) == 1 && request.Requests[0].ResourceGroupName == "test-group" { + recTestGroupAcquireTokenRequest <- true + } + expectResp = responses + }).Return(expectResp, nil) + // wait default group request token by PeriodicReport. + time.Sleep(2 * time.Second) + counter := c2.run.requestUnitTokens[0] + counter.limiter.mu.Lock() + counter.limiter.notify() + counter.limiter.mu.Unlock() + select { + case res := <-recTestGroupAcquireTokenRequest: + re.True(res) + case <-time.After(5 * time.Second): + re.Fail("timeout") + } +} diff --git a/client/resource_group/controller/limiter_test.go b/client/resource_group/controller/limiter_test.go index ba4771810d8..8854d4b2803 100644 --- a/client/resource_group/controller/limiter_test.go +++ b/client/resource_group/controller/limiter_test.go @@ -44,6 +44,18 @@ var ( t8 = t0.Add(time.Duration(8) * d) ) +func resetTime() { + t0 = time.Now() + t1 = t0.Add(time.Duration(1) * d) + t2 = t0.Add(time.Duration(2) * d) + t3 = t0.Add(time.Duration(3) * d) + t4 = t0.Add(time.Duration(4) * d) + t5 = t0.Add(time.Duration(5) * d) + t6 = t0.Add(time.Duration(6) * d) + t7 = t0.Add(time.Duration(7) * d) + t8 = t0.Add(time.Duration(8) * d) +} + type request struct { t time.Time n float64 @@ -136,6 +148,7 @@ func TestNotify(t *testing.T) { } func TestCancel(t *testing.T) { + resetTime() ctx := context.Background() ctx1, cancel1 := context.WithDeadline(ctx, t2) re := require.New(t) @@ -153,8 +166,8 @@ func TestCancel(t *testing.T) { checkTokens(re, lim1, t2, 7) checkTokens(re, lim2, t2, 2) d, err := WaitReservations(ctx, t2, []*Reservation{r1, r2}) - re.Equal(d, 4*time.Second) re.Error(err) + re.Equal(4*time.Second, d) checkTokens(re, lim1, t3, 13) checkTokens(re, lim2, t3, 3) cancel1()