From d0f0b2958569178a049230e7ef485adf3ce76e96 Mon Sep 17 00:00:00 2001 From: Haibin Xie Date: Tue, 15 Jan 2019 20:22:58 +0800 Subject: [PATCH] domain: fix stats worker cannot recover from panic (#9072) --- domain/domain.go | 6 +++--- domain/domain_test.go | 12 ++++++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/domain/domain.go b/domain/domain.go index b22693e411c4e..5ede445ca0452 100644 --- a/domain/domain.go +++ b/domain/domain.go @@ -841,6 +841,7 @@ func (do *Domain) newStatsOwner() owner.Manager { } func (do *Domain) updateStatsWorker(ctx sessionctx.Context, owner owner.Manager) { + defer recoverInDomain("updateStatsWorker", false) lease := do.statsLease deltaUpdateDuration := lease * 20 loadTicker := time.NewTicker(lease) @@ -865,7 +866,6 @@ func (do *Domain) updateStatsWorker(ctx sessionctx.Context, owner owner.Manager) } defer func() { do.SetStatsUpdating(false) - recoverInDomain("updateStatsWorker", false) do.wg.Done() }() for { @@ -922,11 +922,11 @@ func (do *Domain) updateStatsWorker(ctx sessionctx.Context, owner owner.Manager) } func (do *Domain) autoAnalyzeWorker(owner owner.Manager) { + defer recoverInDomain("autoAnalyzeWorker", false) statsHandle := do.StatsHandle() analyzeTicker := time.NewTicker(do.statsLease) - defer analyzeTicker.Stop() defer func() { - recoverInDomain("autoAnalyzeWorker", false) + analyzeTicker.Stop() do.wg.Done() }() for { diff --git a/domain/domain_test.go b/domain/domain_test.go index be1f18c8aec17..8156419cb5106 100644 --- a/domain/domain_test.go +++ b/domain/domain_test.go @@ -22,9 +22,11 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/parser/ast" "github.com/pingcap/parser/model" + "github.com/pingcap/tidb/metrics" "github.com/pingcap/tidb/store/mockstore" "github.com/pingcap/tidb/util/mock" "github.com/pingcap/tidb/util/testleak" + dto "github.com/prometheus/client_model/go" ) func TestT(t *testing.T) { @@ -127,6 +129,16 @@ func (*testSuite) TestT(c *C) { c.Assert(*res[0], Equals, SlowQueryInfo{SQL: "ccc", Duration: 2 * time.Second}) c.Assert(*res[1], Equals, SlowQueryInfo{SQL: "bbb", Duration: 3 * time.Second}) + metrics.PanicCounter.Reset() + // Since the stats lease is 0 now, so create a new ticker will panic. + // Test that they can recover from panic correctly. + dom.updateStatsWorker(ctx, nil) + dom.autoAnalyzeWorker(nil) + counter := metrics.PanicCounter.WithLabelValues(metrics.LabelDomain) + pb := &dto.Metric{} + counter.Write(pb) + c.Assert(pb.GetCounter().GetValue(), Equals, float64(2)) + err = store.Close() c.Assert(err, IsNil) }