Skip to content

Commit bc17d30

Browse files
jkedgarFacebook Github Bot
authored and
Facebook Github Bot
committed
Resolve deadlock scenario during shutdown
Summary: The code to cleanup worker jobs was freeing memory while holding a mutex. The destructors invoked by the memory being freed attempted to acquire LOCK_thd_data. Other threads acquired LOCK_thd_data first and then the mutex being held by the cleanup code. Change the cleanup worker jobs function to do all the freeing of memory after releasing the mutex. Reviewed By: anirbanr-fb Differential Revision: D4251812 fbshipit-source-id: 7056dbb
1 parent 8388b40 commit bc17d30

File tree

1 file changed

+48
-22
lines changed

1 file changed

+48
-22
lines changed

sql/rpl_slave.cc

+48-22
Original file line numberDiff line numberDiff line change
@@ -5245,6 +5245,53 @@ int check_temp_dir(char* tmp_file)
52455245
DBUG_RETURN(0);
52465246
}
52475247

5248+
static std::pair<ulong, ulonglong> cleanup_worker_jobs(Slave_worker *w)
5249+
{
5250+
ulong ii= 0;
5251+
ulong current_event_index;
5252+
ulong purge_cnt= 0;
5253+
ulonglong purge_size= 0;
5254+
struct slave_job_item job_item;
5255+
std::vector<Log_event*> log_event_free_list;
5256+
5257+
mysql_mutex_lock(&w->jobs_lock);
5258+
5259+
log_event_free_list.reserve(w->jobs.avail);
5260+
5261+
current_event_index = std::max(w->last_current_event_index,
5262+
w->current_event_index);
5263+
while (de_queue(&w->jobs, &job_item))
5264+
{
5265+
DBUG_ASSERT(job_item.data);
5266+
5267+
Log_event* log_event= static_cast<Log_event*>(job_item.data);
5268+
5269+
ii++;
5270+
if (ii > current_event_index)
5271+
{
5272+
purge_size += log_event->data_written;
5273+
purge_cnt++;
5274+
}
5275+
5276+
// Save the freeing for outside the mutex
5277+
log_event_free_list.push_back(log_event);
5278+
}
5279+
5280+
DBUG_ASSERT(w->jobs.len == 0);
5281+
5282+
mysql_mutex_unlock(&w->jobs_lock);
5283+
5284+
// Do all the freeing outside the mutex since freeing causes destructors to
5285+
// be called and some destructors acquire locks which can cause deadlock
5286+
// scenarios if we are holding this mutex.
5287+
for (Log_event* log_event : log_event_free_list)
5288+
{
5289+
delete log_event;
5290+
}
5291+
5292+
// Return the number and size of the purged events
5293+
return std::make_pair(purge_cnt, purge_size);
5294+
}
52485295
/*
52495296
Worker thread for the parallel execution of the replication events.
52505297
*/
@@ -5257,9 +5304,6 @@ pthread_handler_t handle_slave_worker(void *arg)
52575304
Relay_log_info* rli= w->c_rli;
52585305
ulong purge_cnt= 0;
52595306
ulonglong purge_size= 0;
5260-
ulong current_event_index = 0;
5261-
ulong i = 0;
5262-
struct slave_job_item _item, *job_item= &_item;
52635307
/* Buffer lifetime extends across the entire runtime of the THD handle. */
52645308
char proc_info_buf[256]= {0};
52655309

@@ -5321,25 +5365,7 @@ pthread_handler_t handle_slave_worker(void *arg)
53215365
thd->clear_error();
53225366
w->cleanup_context(thd, error);
53235367

5324-
mysql_mutex_lock(&w->jobs_lock);
5325-
5326-
current_event_index = max(w->last_current_event_index,
5327-
w->current_event_index);
5328-
while(de_queue(&w->jobs, job_item))
5329-
{
5330-
i++;
5331-
if (i > current_event_index)
5332-
{
5333-
purge_size += ((Log_event*) (job_item->data))->data_written;
5334-
purge_cnt++;
5335-
}
5336-
DBUG_ASSERT(job_item->data);
5337-
delete static_cast<Log_event*>(job_item->data);
5338-
}
5339-
5340-
DBUG_ASSERT(w->jobs.len == 0);
5341-
5342-
mysql_mutex_unlock(&w->jobs_lock);
5368+
std::tie(purge_cnt, purge_size)= cleanup_worker_jobs(w);
53435369

53445370
mysql_mutex_lock(&rli->pending_jobs_lock);
53455371
rli->pending_jobs -= purge_cnt;

0 commit comments

Comments
 (0)