From b562204631589a69fa421c15fe6bef4e429e5a56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Mon, 6 May 2024 17:44:37 +0800 Subject: [PATCH] br/pdutils: retry when encountered dns error (#53005) (#53032) close pingcap/tidb#53029 --- br/pkg/lightning/common/retry.go | 4 +++ br/pkg/lightning/common/retry_test.go | 2 +- br/pkg/pdutil/pd.go | 5 ++++ tests/realtikvtest/brietest/BUILD.bazel | 2 ++ tests/realtikvtest/brietest/pdutil_test.go | 33 ++++++++++++++++++++++ 5 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 tests/realtikvtest/brietest/pdutil_test.go diff --git a/br/pkg/lightning/common/retry.go b/br/pkg/lightning/common/retry.go index c44e2863ea874..15fb4e78cb023 100644 --- a/br/pkg/lightning/common/retry.go +++ b/br/pkg/lightning/common/retry.go @@ -108,6 +108,10 @@ func isSingleRetryableError(err error) bool { switch nerr := err.(type) { case net.Error: + var dErr *net.DNSError + if goerrors.As(nerr, &dErr) { + return true + } if nerr.Timeout() { return true } diff --git a/br/pkg/lightning/common/retry_test.go b/br/pkg/lightning/common/retry_test.go index af390e29e096b..974493736ab16 100644 --- a/br/pkg/lightning/common/retry_test.go +++ b/br/pkg/lightning/common/retry_test.go @@ -38,7 +38,7 @@ func TestIsRetryableError(t *testing.T) { require.True(t, IsRetryableError(ErrWriteTooSlow)) require.False(t, IsRetryableError(io.EOF)) require.False(t, IsRetryableError(&net.AddrError{})) - require.False(t, IsRetryableError(&net.DNSError{})) + require.True(t, IsRetryableError(&net.DNSError{})) require.True(t, IsRetryableError(&net.DNSError{IsTimeout: true})) // kv errors diff --git a/br/pkg/pdutil/pd.go b/br/pkg/pdutil/pd.go index 563acdbf899f6..9da85e72d0978 100644 --- a/br/pkg/pdutil/pd.go +++ b/br/pkg/pdutil/pd.go @@ -53,6 +53,7 @@ const ( // pd request retry time when connection fail pdRequestRetryTime = 120 + // set max-pending-peer-count to a large value to avoid scatter region failed. maxPendingPeerUnlimited uint64 = math.MaxInt32 ) @@ -177,6 +178,10 @@ func pdRequestWithCode( if err != nil { return 0, nil, errors.Trace(err) } + failpoint.Inject("DNSError", func() { + req.Host = "nosuchhost" + req.URL.Host = "nosuchhost" + }) resp, err = cli.Do(req) //nolint:bodyclose count++ failpoint.Inject("InjectClosed", func(v failpoint.Value) { diff --git a/tests/realtikvtest/brietest/BUILD.bazel b/tests/realtikvtest/brietest/BUILD.bazel index dcd73bbd6d14e..0346f2bb3a16a 100644 --- a/tests/realtikvtest/brietest/BUILD.bazel +++ b/tests/realtikvtest/brietest/BUILD.bazel @@ -9,10 +9,12 @@ go_test( "brie_test.go", "main_test.go", "operator_test.go", + "pdutil_test.go", ], flaky = True, race = "on", deps = [ + "//br/pkg/pdutil", "//br/pkg/task", "//br/pkg/task/operator", "//pkg/config", diff --git a/tests/realtikvtest/brietest/pdutil_test.go b/tests/realtikvtest/brietest/pdutil_test.go new file mode 100644 index 0000000000000..298de04afa6f7 --- /dev/null +++ b/tests/realtikvtest/brietest/pdutil_test.go @@ -0,0 +1,33 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package brietest + +import ( + "context" + "testing" + + "github.com/pingcap/failpoint" + "github.com/pingcap/tidb/br/pkg/pdutil" + "github.com/stretchr/testify/require" + pd "github.com/tikv/pd/client" +) + +func TestCreateClient(t *testing.T) { + require.NoError(t, failpoint.Enable("github.com/pingcap/tidb/br/pkg/pdutil/DNSError", "119*return")) + require.NoError(t, failpoint.Enable("github.com/pingcap/tidb/br/pkg/pdutil/FastRetry", "return(true)")) + ctl, err := pdutil.NewPdController(context.Background(), "127.0.0.1:2379", nil, pd.SecurityOption{}) + require.NoError(t, err) + ctl.Close() +}