Skip to content

Commit

Permalink
*: unify logs when the cluster is upgrading state (#44016) (#44029)
Browse files Browse the repository at this point in the history
close #44017
  • Loading branch information
ti-chi-bot authored May 22, 2023
1 parent eac96e8 commit 52130f3
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 16 deletions.
4 changes: 2 additions & 2 deletions ddl/ddl_worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -380,11 +380,11 @@ func (d *ddl) addBatchDDLJobs2Table(tasks []*limitJobTask) error {

if d.stateSyncer.IsUpgradingState() && !hasSysDB(job) {
if err = pauseRunningJob(sess.NewSession(se), job, model.AdminCommandBySystem); err != nil {
logutil.BgLogger().Warn("[ddl] pause user DDL by system failed", zap.Stringer("job", job), zap.Error(err))
logutil.BgLogger().Warn("[ddl-upgrading] pause user DDL by system failed", zap.Stringer("job", job), zap.Error(err))
task.cacheErr = err
continue
}
logutil.BgLogger().Info("[ddl] pause user DDL by system successful", zap.Stringer("job", job))
logutil.BgLogger().Info("[ddl-upgrading] pause user DDL by system successful", zap.Stringer("job", job))
}

jobTasks = append(jobTasks, job)
Expand Down
9 changes: 6 additions & 3 deletions ddl/job_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,9 +181,9 @@ func (d *ddl) processJobDuringUpgrade(sess *sess.Session, job *model.Job) (isRun
}

if err != nil {
errMsg := fmt.Sprintf("[DDL] unable to pause [%d], error: %s",
job.ID, zap.Error(err).String)
logutil.BgLogger().Warn(errMsg)
logutil.BgLogger().Warn("[ddl-upgrading] pause the job failed", zap.Stringer("job", job), zap.Bool("has job err", len(errs) > 0), zap.Error(err))
} else {
logutil.BgLogger().Warn("[ddl-upgrading] pause the job successfully", zap.Stringer("job", job))
}

return false, nil
Expand All @@ -193,11 +193,14 @@ func (d *ddl) processJobDuringUpgrade(sess *sess.Session, job *model.Job) (isRun
var errs []error
errs, err = ResumeJobsBySystem(sess.Session(), []int64{job.ID})
if len(errs) > 0 {
logutil.BgLogger().Warn("[ddl-upgrading] normal cluster state, resume the job failed", zap.Stringer("job", job), zap.Error(errs[0]))
return false, errs[0]
}
if err != nil {
logutil.BgLogger().Warn("[ddl-upgrading] normal cluster state, resume the job failed", zap.Stringer("job", job), zap.Error(err))
return false, err
}
logutil.BgLogger().Warn("[ddl-upgrading] normal cluster state, resume the job successfully", zap.Stringer("job", job))
}

return true, nil
Expand Down
21 changes: 10 additions & 11 deletions session/bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -1145,7 +1145,7 @@ func syncUpgradeState(s Session) {
dom := domain.GetDomain(s)
err := dom.DDL().StateSyncer().UpdateGlobalState(ctx, syncer.NewStateInfo(syncer.StateUpgrading))
if err != nil {
logutil.BgLogger().Fatal("upgrade update global state failed", zap.String("state", syncer.StateUpgrading), zap.Error(err))
logutil.BgLogger().Fatal("[upgrading] update global state failed", zap.String("state", syncer.StateUpgrading), zap.Error(err))
}

retryTimes := 10
Expand All @@ -1156,9 +1156,9 @@ func syncUpgradeState(s Session) {
break
}
if i == retryTimes-1 {
logutil.BgLogger().Fatal("upgrade get owner op failed", zap.Stringer("state", op), zap.Error(err))
logutil.BgLogger().Fatal("[upgrading] get owner op failed", zap.Stringer("state", op), zap.Error(err))
}
logutil.BgLogger().Warn("upgrade get owner op failed", zap.Stringer("state", op), zap.Error(err))
logutil.BgLogger().Warn("[upgrading] get owner op failed", zap.Stringer("state", op), zap.Error(err))
time.Sleep(interval)
}

Expand All @@ -1181,32 +1181,31 @@ func syncUpgradeState(s Session) {
}

if i == retryTimes-1 {
logutil.BgLogger().Fatal("upgrade pause all jobs failed", zap.Error(err))
logutil.BgLogger().Fatal("[upgrading] pause all jobs failed", zap.Error(err))
}
logutil.BgLogger().Warn("upgrade pause all jobs failed", zap.Error(err))
logutil.BgLogger().Warn("[upgrading] pause all jobs failed", zap.Error(err))
time.Sleep(interval)
}
logutil.BgLogger().Info("upgrade update global state to upgrading", zap.String("state", syncer.StateUpgrading))
logutil.BgLogger().Info("[upgrading] update global state to upgrading", zap.String("state", syncer.StateUpgrading))
}

func syncNormalRunning(s Session) {
jobErrs, err := ddl.ResumeAllJobsBySystem(s)
if err != nil {
logutil.BgLogger().Warn("[upgrading] unexpected error to resume all paused jobs: ", zap.Error(err))
logutil.BgLogger().Warn("[upgrading] resume all paused jobs failed", zap.Error(err))
}

for _, e := range jobErrs {
logutil.BgLogger().Warn("[upgrading] unable to resume the job, error: ", zap.Error(e))
logutil.BgLogger().Warn("[upgrading] resume the job failed ", zap.Error(e))
}

ctx, cancelFunc := context.WithTimeout(context.Background(), 3*time.Second)
defer cancelFunc()
dom := domain.GetDomain(s)
err = dom.DDL().StateSyncer().UpdateGlobalState(ctx, syncer.NewStateInfo(syncer.StateNormalRunning))
if err != nil {
logutil.BgLogger().Fatal("upgrade update global state failed", zap.String("state", syncer.StateNormalRunning), zap.Error(err))
logutil.BgLogger().Fatal("[upgrading] update global state to normal failed", zap.Error(err))
}
logutil.BgLogger().Info("upgrade update global state to normal running finished", zap.String("state", syncer.StateNormalRunning))
logutil.BgLogger().Info("[upgrading] update global state to normal running finished")
}

// checkOwnerVersion is used to wait the DDL owner to be elected in the cluster and check it is the same version as this TiDB.
Expand Down

0 comments on commit 52130f3

Please sign in to comment.