From e04723a56fe84785145e1e88eb3befcacaf3b86f Mon Sep 17 00:00:00 2001 From: crazycs Date: Tue, 2 Jul 2019 11:44:14 +0800 Subject: [PATCH] ddl: add recover for run ddl job (#10981) (#11022) --- ddl/ddl_worker.go | 14 +++++++++++++- ddl/failtest/fail_db_test.go | 14 ++++++++++++++ ddl/table.go | 2 +- infoschema/infoschema.go | 5 +++++ 4 files changed, 33 insertions(+), 2 deletions(-) diff --git a/ddl/ddl_worker.go b/ddl/ddl_worker.go index 2151d9efebb78..6ec289f3ebd82 100644 --- a/ddl/ddl_worker.go +++ b/ddl/ddl_worker.go @@ -21,6 +21,7 @@ import ( "time" "github.com/pingcap/errors" + "github.com/pingcap/failpoint" "github.com/pingcap/parser/model" "github.com/pingcap/parser/terror" "github.com/pingcap/tidb/ddl/util" @@ -30,6 +31,7 @@ import ( "github.com/pingcap/tidb/sessionctx" "github.com/pingcap/tidb/sessionctx/binloginfo" "github.com/pingcap/tidb/sessionctx/variable" + tidbutil "github.com/pingcap/tidb/util" "github.com/pingcap/tidb/util/admin" "github.com/pingcap/tidb/util/logutil" "go.uber.org/zap" @@ -397,7 +399,14 @@ func (w *worker) handleDDLJobQueue(d *ddlCtx) error { // If running job meets error, we will save this error in job Error // and retry later if the job is not cancelled. - schemaVer, runJobErr = w.runDDLJob(d, t, job) + tidbutil.WithRecovery(func() { + schemaVer, runJobErr = w.runDDLJob(d, t, job) + }, func(r interface{}) { + if r != nil { + // If run ddl job panic, just cancel the ddl jobs. + job.State = model.JobStateCancelling + } + }) if job.IsCancelled() { txn.Reset() err = w.finishDDLJob(t, job) @@ -469,6 +478,9 @@ func chooseLeaseTime(t, max time.Duration) time.Duration { // runDDLJob runs a DDL job. It returns the current schema version in this transaction and the error. func (w *worker) runDDLJob(d *ddlCtx, t *meta.Meta, job *model.Job) (ver int64, err error) { + // Mock for run ddl job panic. + failpoint.Inject("mockPanicInRunDDLJob", func(_ failpoint.Value) {}) + logutil.Logger(w.logCtx).Info("[ddl] run DDL job", zap.String("job", job.String())) timeStart := time.Now() defer func() { diff --git a/ddl/failtest/fail_db_test.go b/ddl/failtest/fail_db_test.go index a8ad59c218ed0..8488cda6cddf7 100644 --- a/ddl/failtest/fail_db_test.go +++ b/ddl/failtest/fail_db_test.go @@ -386,3 +386,17 @@ LOOP: tk.MustExec("admin check table test_add_index") tk.MustExec("drop table test_add_index") } + +// TestRunDDLJobPanic tests recover panic when run ddl job panic. +func (s *testFailDBSuite) TestRunDDLJobPanic(c *C) { + defer func() { + c.Assert(failpoint.Disable("github.com/pingcap/tidb/ddl/mockPanicInRunDDLJob"), IsNil) + }() + tk := testkit.NewTestKit(c, s.store) + tk.MustExec("use test") + tk.MustExec("drop table if exists t") + c.Assert(failpoint.Enable("github.com/pingcap/tidb/ddl/mockPanicInRunDDLJob", `1*panic("panic test")`), IsNil) + _, err := tk.Exec("create table t(c1 int, c2 int)") + c.Assert(err, NotNil) + c.Assert(err.Error(), Equals, "[ddl:12]cancelled DDL job") +} diff --git a/ddl/table.go b/ddl/table.go index a466f40e2f2af..3232fa3ab8ae2 100644 --- a/ddl/table.go +++ b/ddl/table.go @@ -765,7 +765,7 @@ func onModifyTableCharsetAndCollate(t *meta.Meta, job *model.Job) (ver int64, _ func checkTableNotExists(d *ddlCtx, t *meta.Meta, schemaID int64, tableName string) error { // d.infoHandle maybe nil in some test. - if d.infoHandle == nil { + if d.infoHandle == nil || !d.infoHandle.IsValid() { return checkTableNotExistsFromStore(t, schemaID, tableName) } // Try to use memory schema info to check first. diff --git a/infoschema/infoschema.go b/infoschema/infoschema.go index ed455b3dacfcb..7b0b5f612cae0 100644 --- a/infoschema/infoschema.go +++ b/infoschema/infoschema.go @@ -292,6 +292,11 @@ func (h *Handle) Get() InfoSchema { return schema } +// IsValid uses to check whether handle value is valid. +func (h *Handle) IsValid() bool { + return h.value.Load() != nil +} + // EmptyClone creates a new Handle with the same store and memSchema, but the value is not set. func (h *Handle) EmptyClone() *Handle { newHandle := &Handle{