Skip to content

Commit e05d9e1

Browse files
shinrichzwoop
authored andcommitted
Fixes to hostDB to avoid event and memory leaks (#6686)
Co-authored-by: Susan Hinrichs <shinrich@verizonmedia.com> (cherry picked from commit 6094492)
1 parent af1f699 commit e05d9e1

File tree

2 files changed

+31
-30
lines changed

2 files changed

+31
-30
lines changed

iocore/hostdb/HostDB.cc

Lines changed: 31 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ HostDBContinuation::Options const HostDBContinuation::DEFAULT_OPTIONS;
4343
int hostdb_enable = true;
4444
int hostdb_migrate_on_demand = true;
4545
int hostdb_lookup_timeout = 30;
46-
int hostdb_insert_timeout = 160;
4746
int hostdb_re_dns_on_reload = false;
4847
int hostdb_ttl_mode = TTL_OBEY;
4948
unsigned int hostdb_round_robin_max_count = 16;
@@ -1175,12 +1174,13 @@ HostDBContinuation::dnsEvent(int event, HostEnt *e)
11751174
timeout = nullptr;
11761175
}
11771176
EThread *thread = mutex->thread_holding;
1178-
if (event == EVENT_INTERVAL) {
1177+
if (event != DNS_EVENT_LOOKUP) {
1178+
// This was an event_interval or an event_immediate
1179+
// Either we timed out, or remove_trigger_pending gave up on us
11791180
if (!action.continuation) {
11801181
// give up on insert, it has been too long
1181-
// remove_trigger_pending_dns will notify and clean up all requests
1182-
// including this one.
1183-
remove_trigger_pending_dns();
1182+
hostDB.pending_dns_for_hash(hash.hash).remove(this);
1183+
hostdb_cont_free(this);
11841184
return EVENT_DONE;
11851185
}
11861186
MUTEX_TRY_LOCK(lock, action.mutex, thread);
@@ -1196,8 +1196,6 @@ HostDBContinuation::dnsEvent(int event, HostEnt *e)
11961196
action.continuation->handleEvent(EVENT_HOST_DB_LOOKUP, nullptr);
11971197
}
11981198
action = nullptr;
1199-
// do not exit yet, wait to see if we can insert into DB
1200-
timeout = thread->schedule_in(this, HRTIME_SECONDS(hostdb_insert_timeout));
12011199
return EVENT_DONE;
12021200
} else {
12031201
bool failed = !e || !e->good;
@@ -1420,37 +1418,38 @@ HostDBContinuation::dnsEvent(int event, HostEnt *e)
14201418
return EVENT_CONT;
14211419
}
14221420

1423-
// We have seen cases were the action.mutex != action.continuation.mutex.
1421+
// We have seen cases were the action.mutex != action.continuation.mutex. However, it seems that case
1422+
// is likely a memory corruption... Thus the introduction of the assert.
14241423
// Since reply_to_cont will call the handler on the action.continuation, it is important that we hold
14251424
// that mutex.
14261425
bool need_to_reschedule = true;
14271426
MUTEX_TRY_LOCK(lock, action.mutex, thread);
14281427
if (lock.is_locked()) {
1429-
need_to_reschedule = !action.cancelled;
14301428
if (!action.cancelled) {
14311429
if (action.continuation->mutex) {
1432-
MUTEX_TRY_LOCK(lock2, action.continuation->mutex, thread);
1433-
if (lock2.is_locked()) {
1434-
reply_to_cont(action.continuation, r, is_srv());
1435-
need_to_reschedule = false;
1436-
}
1437-
} else {
1438-
reply_to_cont(action.continuation, r, is_srv());
1439-
need_to_reschedule = false;
1430+
ink_release_assert(action.continuation->mutex == action.mutex);
14401431
}
1432+
reply_to_cont(action.continuation, r, is_srv());
14411433
}
1434+
need_to_reschedule = false;
14421435
}
1436+
14431437
if (need_to_reschedule) {
14441438
SET_HANDLER((HostDBContHandler)&HostDBContinuation::probeEvent);
1445-
// remove_trigger_pending_dns should kick off the current hostDB too
1446-
// No need to explicitly reschedule
1447-
remove_trigger_pending_dns();
1439+
// Will reschedule on affinity thread or current thread
1440+
timeout = eventProcessor.schedule_in(this, HOST_DB_RETRY_PERIOD);
14481441
return EVENT_CONT;
14491442
}
14501443
}
1444+
1445+
// Clean ourselves up
1446+
hostDB.pending_dns_for_hash(hash.hash).remove(this);
1447+
14511448
// wake up everyone else who is waiting
14521449
remove_trigger_pending_dns();
14531450

1451+
hostdb_cont_free(this);
1452+
14541453
// all done, or at least scheduled to be all done
14551454
//
14561455
return EVENT_DONE;
@@ -1523,12 +1522,17 @@ HostDBContinuation::probeEvent(int /* event ATS_UNUSED */, Event *e)
15231522
ink_assert(!link.prev && !link.next);
15241523
EThread *t = e ? e->ethread : this_ethread();
15251524

1525+
if (timeout) {
1526+
timeout->cancel(this);
1527+
timeout = nullptr;
1528+
}
1529+
15261530
MUTEX_TRY_LOCK(lock, action.mutex, t);
15271531

15281532
// Separating lock checks here to make sure things don't break
15291533
// when we check if the action is cancelled.
15301534
if (!lock.is_locked()) {
1531-
mutex->thread_holding->schedule_in(this, HOST_DB_RETRY_PERIOD);
1535+
timeout = mutex->thread_holding->schedule_in(this, HOST_DB_RETRY_PERIOD);
15321536
return EVENT_CONT;
15331537
}
15341538

@@ -1537,13 +1541,8 @@ HostDBContinuation::probeEvent(int /* event ATS_UNUSED */, Event *e)
15371541
return EVENT_DONE;
15381542
}
15391543

1540-
// Go ahead and grab the continuation mutex or just grab the action mutex again of there is no continuation mutex
1541-
MUTEX_TRY_LOCK(lock2, (action.continuation && action.continuation->mutex) ? action.continuation->mutex : action.mutex, t);
1542-
// Don't continue unless we have both mutexes
1543-
if (!lock2.is_locked()) {
1544-
mutex->thread_holding->schedule_in(this, HOST_DB_RETRY_PERIOD);
1545-
return EVENT_CONT;
1546-
}
1544+
// If the action.continuation->mutex != action.mutex, we have a use after free/realloc
1545+
ink_release_assert(!action.continuation || action.continuation->mutex == action.mutex);
15471546

15481547
if (!hostdb_enable || (!*hash.host_name && !hash.ip.isValid())) {
15491548
if (action.continuation) {
@@ -1619,7 +1618,10 @@ HostDBContinuation::remove_trigger_pending_dns()
16191618
if (!affinity_thread || affinity_thread == thread) {
16201619
c->handleEvent(EVENT_IMMEDIATE, nullptr);
16211620
} else {
1622-
eventProcessor.schedule_imm(c);
1621+
if (c->timeout) {
1622+
c->timeout->cancel();
1623+
}
1624+
c->timeout = eventProcessor.schedule_imm(c);
16231625
}
16241626
}
16251627
}

iocore/hostdb/P_HostDBProcessor.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@
3737
extern int hostdb_enable;
3838
extern int hostdb_migrate_on_demand;
3939
extern int hostdb_lookup_timeout;
40-
extern int hostdb_insert_timeout;
4140
extern int hostdb_re_dns_on_reload;
4241

4342
// 0 = obey, 1 = ignore, 2 = min(X,ttl), 3 = max(X,ttl)

0 commit comments

Comments
 (0)