-
Notifications
You must be signed in to change notification settings - Fork 5.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ddl: fix a bug that MDL may progress unexpectedly or block forever #46921
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -277,7 +277,7 @@ func (d *ddl) startDispatchLoop() { | |
} | ||
if !d.isOwner() { | ||
isOnce = true | ||
d.once.Store(true) | ||
d.onceMap = make(map[int64]struct{}, jobOnceCapacity) | ||
time.Sleep(dispatchLoopWaitingDuration) | ||
continue | ||
} | ||
|
@@ -378,7 +378,7 @@ func (d *ddl) delivery2worker(wk *worker, pool *workerPool, job *model.Job) { | |
metrics.DDLRunningJobCount.WithLabelValues(pool.tp().String()).Dec() | ||
}() | ||
// check if this ddl job is synced to all servers. | ||
if !d.isSynced(job) || d.once.Load() { | ||
if !d.isSynced(job) || !d.maybeAlreadyRunOnce(job.ID) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this cause all jobs to have an extra "waitSchemaSynced"? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After GC the onceMap, the jobs need to do an extra checkMDLInfo. If waitSchemaSynced passed already, checkMDLInfo would return nil for this job. |
||
if variable.EnableMDL.Load() { | ||
exist, version, err := checkMDLInfo(job.ID, d.sessPool) | ||
if err != nil { | ||
|
@@ -393,7 +393,7 @@ func (d *ddl) delivery2worker(wk *worker, pool *workerPool, job *model.Job) { | |
if err != nil { | ||
return | ||
} | ||
d.once.Store(false) | ||
d.setAlreadyRunOnce(job.ID) | ||
cleanMDLInfo(d.sessPool, job.ID, d.etcdCli) | ||
// Don't have a worker now. | ||
return | ||
|
@@ -407,7 +407,7 @@ func (d *ddl) delivery2worker(wk *worker, pool *workerPool, job *model.Job) { | |
pool.put(wk) | ||
return | ||
} | ||
d.once.Store(false) | ||
d.setAlreadyRunOnce(job.ID) | ||
} | ||
} | ||
|
||
|
@@ -426,9 +426,14 @@ func (d *ddl) delivery2worker(wk *worker, pool *workerPool, job *model.Job) { | |
}) | ||
|
||
// Here means the job enters another state (delete only, write only, public, etc...) or is cancelled. | ||
// If the job is done or still running or rolling back, we will wait 2 * lease time to guarantee other servers to update | ||
// If the job is done or still running or rolling back, we will wait 2 * lease time or util MDL synced to guarantee other servers to update | ||
// the newest schema. | ||
waitSchemaChanged(d.ddlCtx, d.lease*2, schemaVer, job) | ||
err := waitSchemaChanged(d.ddlCtx, d.lease*2, schemaVer, job) | ||
if err != nil { | ||
// May be caused by server closing, shouldn't clean the MDL info. | ||
logutil.BgLogger().Info("wait latest schema version error", zap.String("category", "ddl"), zap.Error(err)) | ||
return | ||
} | ||
cleanMDLInfo(d.sessPool, job.ID, d.etcdCli) | ||
d.synced(job) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could we combine conditions line838 and line840?