@@ -43,7 +43,6 @@ HostDBContinuation::Options const HostDBContinuation::DEFAULT_OPTIONS;
4343int hostdb_enable = true ;
4444int hostdb_migrate_on_demand = true ;
4545int hostdb_lookup_timeout = 30 ;
46- int hostdb_insert_timeout = 160 ;
4746int hostdb_re_dns_on_reload = false ;
4847int hostdb_ttl_mode = TTL_OBEY;
4948unsigned int hostdb_round_robin_max_count = 16 ;
@@ -1175,12 +1174,13 @@ HostDBContinuation::dnsEvent(int event, HostEnt *e)
11751174 timeout = nullptr ;
11761175 }
11771176 EThread *thread = mutex->thread_holding ;
1178- if (event == EVENT_INTERVAL) {
1177+ if (event != DNS_EVENT_LOOKUP) {
1178+ // This was an event_interval or an event_immediate
1179+ // Either we timed out, or remove_trigger_pending gave up on us
11791180 if (!action.continuation ) {
11801181 // give up on insert, it has been too long
1181- // remove_trigger_pending_dns will notify and clean up all requests
1182- // including this one.
1183- remove_trigger_pending_dns ();
1182+ hostDB.pending_dns_for_hash (hash.hash ).remove (this );
1183+ hostdb_cont_free (this );
11841184 return EVENT_DONE;
11851185 }
11861186 MUTEX_TRY_LOCK (lock, action.mutex , thread);
@@ -1196,8 +1196,6 @@ HostDBContinuation::dnsEvent(int event, HostEnt *e)
11961196 action.continuation ->handleEvent (EVENT_HOST_DB_LOOKUP, nullptr );
11971197 }
11981198 action = nullptr ;
1199- // do not exit yet, wait to see if we can insert into DB
1200- timeout = thread->schedule_in (this , HRTIME_SECONDS (hostdb_insert_timeout));
12011199 return EVENT_DONE;
12021200 } else {
12031201 bool failed = !e || !e->good ;
@@ -1420,37 +1418,38 @@ HostDBContinuation::dnsEvent(int event, HostEnt *e)
14201418 return EVENT_CONT;
14211419 }
14221420
1423- // We have seen cases were the action.mutex != action.continuation.mutex.
1421+ // We have seen cases were the action.mutex != action.continuation.mutex. However, it seems that case
1422+ // is likely a memory corruption... Thus the introduction of the assert.
14241423 // Since reply_to_cont will call the handler on the action.continuation, it is important that we hold
14251424 // that mutex.
14261425 bool need_to_reschedule = true ;
14271426 MUTEX_TRY_LOCK (lock, action.mutex , thread);
14281427 if (lock.is_locked ()) {
1429- need_to_reschedule = !action.cancelled ;
14301428 if (!action.cancelled ) {
14311429 if (action.continuation ->mutex ) {
1432- MUTEX_TRY_LOCK (lock2, action.continuation ->mutex , thread);
1433- if (lock2.is_locked ()) {
1434- reply_to_cont (action.continuation , r, is_srv ());
1435- need_to_reschedule = false ;
1436- }
1437- } else {
1438- reply_to_cont (action.continuation , r, is_srv ());
1439- need_to_reschedule = false ;
1430+ ink_release_assert (action.continuation ->mutex == action.mutex );
14401431 }
1432+ reply_to_cont (action.continuation , r, is_srv ());
14411433 }
1434+ need_to_reschedule = false ;
14421435 }
1436+
14431437 if (need_to_reschedule) {
14441438 SET_HANDLER ((HostDBContHandler)&HostDBContinuation::probeEvent);
1445- // remove_trigger_pending_dns should kick off the current hostDB too
1446- // No need to explicitly reschedule
1447- remove_trigger_pending_dns ();
1439+ // Will reschedule on affinity thread or current thread
1440+ timeout = eventProcessor.schedule_in (this , HOST_DB_RETRY_PERIOD);
14481441 return EVENT_CONT;
14491442 }
14501443 }
1444+
1445+ // Clean ourselves up
1446+ hostDB.pending_dns_for_hash (hash.hash ).remove (this );
1447+
14511448 // wake up everyone else who is waiting
14521449 remove_trigger_pending_dns ();
14531450
1451+ hostdb_cont_free (this );
1452+
14541453 // all done, or at least scheduled to be all done
14551454 //
14561455 return EVENT_DONE;
@@ -1523,12 +1522,17 @@ HostDBContinuation::probeEvent(int /* event ATS_UNUSED */, Event *e)
15231522 ink_assert (!link.prev && !link.next );
15241523 EThread *t = e ? e->ethread : this_ethread ();
15251524
1525+ if (timeout) {
1526+ timeout->cancel (this );
1527+ timeout = nullptr ;
1528+ }
1529+
15261530 MUTEX_TRY_LOCK (lock, action.mutex , t);
15271531
15281532 // Separating lock checks here to make sure things don't break
15291533 // when we check if the action is cancelled.
15301534 if (!lock.is_locked ()) {
1531- mutex->thread_holding ->schedule_in (this , HOST_DB_RETRY_PERIOD);
1535+ timeout = mutex->thread_holding ->schedule_in (this , HOST_DB_RETRY_PERIOD);
15321536 return EVENT_CONT;
15331537 }
15341538
@@ -1537,13 +1541,8 @@ HostDBContinuation::probeEvent(int /* event ATS_UNUSED */, Event *e)
15371541 return EVENT_DONE;
15381542 }
15391543
1540- // Go ahead and grab the continuation mutex or just grab the action mutex again of there is no continuation mutex
1541- MUTEX_TRY_LOCK (lock2, (action.continuation && action.continuation ->mutex ) ? action.continuation ->mutex : action.mutex , t);
1542- // Don't continue unless we have both mutexes
1543- if (!lock2.is_locked ()) {
1544- mutex->thread_holding ->schedule_in (this , HOST_DB_RETRY_PERIOD);
1545- return EVENT_CONT;
1546- }
1544+ // If the action.continuation->mutex != action.mutex, we have a use after free/realloc
1545+ ink_release_assert (!action.continuation || action.continuation ->mutex == action.mutex );
15471546
15481547 if (!hostdb_enable || (!*hash.host_name && !hash.ip .isValid ())) {
15491548 if (action.continuation ) {
@@ -1619,7 +1618,10 @@ HostDBContinuation::remove_trigger_pending_dns()
16191618 if (!affinity_thread || affinity_thread == thread) {
16201619 c->handleEvent (EVENT_IMMEDIATE, nullptr );
16211620 } else {
1622- eventProcessor.schedule_imm (c);
1621+ if (c->timeout ) {
1622+ c->timeout ->cancel ();
1623+ }
1624+ c->timeout = eventProcessor.schedule_imm (c);
16231625 }
16241626 }
16251627}
0 commit comments