diff --git a/proxy/ParentConsistentHash.cc b/proxy/ParentConsistentHash.cc index dbb42eb7577..549cb8f0277 100644 --- a/proxy/ParentConsistentHash.cc +++ b/proxy/ParentConsistentHash.cc @@ -235,7 +235,7 @@ ParentConsistentHash::selectParent(bool first_call, ParentResult *result, Reques Debug("parent_select", "Parent.failedAt = %u, retry = %u, xact_start = %u", static_cast(pRec->failedAt.load()), static_cast(retry_time), static_cast(request_info->xact_start)); if ((pRec->failedAt.load() + retry_time) < request_info->xact_start) { - if (pRec->retriers.fetch_add(1, std::memory_order_relaxed) < max_retriers) { + if (pRec->retriers.inc(max_retriers)) { parentRetry = true; // make sure that the proper state is recorded in the result structure result->last_parent = pRec->idx; @@ -243,10 +243,8 @@ ParentConsistentHash::selectParent(bool first_call, ParentResult *result, Reques result->retry = parentRetry; result->result = PARENT_SPECIFIED; Debug("parent_select", "Down parent %s is now retryable, retriers = %d, max_retriers = %d", pRec->hostname, - pRec->retriers.load(), max_retriers); + pRec->retriers(), max_retriers); break; - } else { - pRec->retriers--; } } } @@ -395,7 +393,7 @@ ParentConsistentHash::markParentUp(ParentResult *result) pRec->failedAt = static_cast(0); int old_count = pRec->failCount.exchange(0, std::memory_order_relaxed); - pRec->retriers = 0; + pRec->retriers.clear(); if (old_count > 0) { Note("http parent proxy %s:%d restored", pRec->hostname, pRec->port); diff --git a/proxy/ParentRoundRobin.cc b/proxy/ParentRoundRobin.cc index a0e80eef8ab..2aba8d98bf5 100644 --- a/proxy/ParentRoundRobin.cc +++ b/proxy/ParentRoundRobin.cc @@ -157,18 +157,15 @@ ParentRoundRobin::selectParent(bool first_call, ParentResult *result, RequestDat } else { if ((result->wrap_around) || (((parents[cur_index].failedAt + retry_time) < request_info->xact_start) && host_stat == TS_HOST_STATUS_UP)) { - if (parents[cur_index].retriers.fetch_add(1, std::memory_order_relaxed) < max_retriers) { + if (parents[cur_index].retriers.inc(max_retriers)) { Debug("parent_select", "Parent[%d].failedAt = %u, retry = %u, retriers = %d, max_retriers = %u, xact_start = %" PRId64 " but wrap = %d", - cur_index, static_cast(parents[cur_index].failedAt.load()), retry_time, - parents[cur_index].retriers.load(), max_retriers, static_cast(request_info->xact_start), - result->wrap_around); + cur_index, static_cast(parents[cur_index].failedAt.load()), retry_time, parents[cur_index].retriers(), + max_retriers, static_cast(request_info->xact_start), result->wrap_around); // Reuse the parent parentUp = true; parentRetry = true; Debug("parent_select", "Parent marked for retry %s:%d", parents[cur_index].hostname, parents[cur_index].port); - } else { - parents[cur_index].retriers--; } } else { parentUp = false; diff --git a/proxy/ParentSelection.cc b/proxy/ParentSelection.cc index f39686aa815..64c2a409843 100644 --- a/proxy/ParentSelection.cc +++ b/proxy/ParentSelection.cc @@ -539,7 +539,7 @@ ParentRecord::ProcessParents(char *val, bool isPrimary) this->parents[i].name = this->parents[i].hostname; this->parents[i].available = true; this->parents[i].weight = weight; - this->parents[i].retriers = 0; + this->parents[i].retriers.clear(); if (tmp3) { memcpy(this->parents[i].hash_string, tmp3 + 1, strlen(tmp3)); this->parents[i].name = this->parents[i].hash_string; @@ -555,7 +555,7 @@ ParentRecord::ProcessParents(char *val, bool isPrimary) this->secondary_parents[i].name = this->secondary_parents[i].hostname; this->secondary_parents[i].available = true; this->secondary_parents[i].weight = weight; - this->secondary_parents[i].retriers = 0; + this->secondary_parents[i].retriers.clear(); if (tmp3) { memcpy(this->secondary_parents[i].hash_string, tmp3 + 1, strlen(tmp3)); this->secondary_parents[i].name = this->secondary_parents[i].hash_string; diff --git a/proxy/ParentSelection.h b/proxy/ParentSelection.h index 82c5b2ea33d..954e19cdb85 100644 --- a/proxy/ParentSelection.h +++ b/proxy/ParentSelection.h @@ -105,11 +105,70 @@ struct SimpleRetryResponseCodes { private: std::vector codes; }; +// class pRetriers +// +// Count of retriers with atomic read, increment, and decrement. +// +class pRetriers +{ +public: + int + operator()() const + { + return _v.load(); + } + + void + clear() + { + _v = 0; + } + + bool + inc(int max_retriers) + { + ink_assert(max_retriers > 0); + + int r = _v.load(std::memory_order_relaxed); + while (r < max_retriers) { + if (_v.compare_exchange_weak(r, r + 1)) { + return true; + } + } + return false; + } + + void + dec() + { + int r = _v.load(std::memory_order_relaxed); + while (r > 0) { + if (_v.compare_exchange_weak(r, r - 1)) { + break; + } + } + } + + pRetriers() = default; + + pRetriers(pRetriers const &o) { _v = o._v.load(); } + + pRetriers & + operator=(pRetriers const &o) + { + _v = o._v.load(); + return *this; + } + +private: + std::atomic _v = 0; +}; // struct pRecord // // A record for an individual parent // struct pRecord : ATSConsistentHashNode { +public: char hostname[MAXDNAME + 1]; int port; std::atomic failedAt = 0; @@ -119,7 +178,13 @@ struct pRecord : ATSConsistentHashNode { int idx; float weight; char hash_string[MAXDNAME + 1]; - std::atomic retriers = 0; + pRetriers retriers; + + void + retryComplete() + { + retriers.dec(); + } }; typedef ControlMatcher P_table; @@ -359,6 +424,16 @@ class ParentSelectionStrategy // virtual destructor. virtual ~ParentSelectionStrategy(){}; + + void + retryComplete(ParentResult *result) + { + pRecord *p = getParents(result); + uint32_t n = numParents(result); + if (p != nullptr && result->last_parent < n) { + p[result->last_parent].retryComplete(); + } + } }; class ParentConfigParams : public ConfigInfo @@ -411,6 +486,15 @@ class ParentConfigParams : public ConfigInfo } } + void + retryComplete(ParentResult *result) + { + if (!result->is_api_result()) { + ink_release_assert(result != nullptr); + result->rec->selection_strategy->retryComplete(result); + } + } + P_table *parent_table; ParentRecord *DefaultParent; ParentSelectionPolicy policy; diff --git a/proxy/ParentSelectionStrategy.cc b/proxy/ParentSelectionStrategy.cc index 1c1c3a7e4e7..f82675cff4e 100644 --- a/proxy/ParentSelectionStrategy.cc +++ b/proxy/ParentSelectionStrategy.cc @@ -65,11 +65,6 @@ ParentSelectionStrategy::markParentDown(ParentResult *result, unsigned int fail_ // must set the count to reflect this if (result->retry == false) { new_fail_count = pRec->failCount = 1; - } else { - // this was a retry that failed, decrement the retriers count - if ((pRec->retriers--) < 0) { - pRec->retriers = 0; - } } Note("Parent %s marked as down %s:%d", (result->retry) ? "retry" : "initially", pRec->hostname, pRec->port); @@ -128,7 +123,7 @@ ParentSelectionStrategy::markParentUp(ParentResult *result) pRec->failedAt = static_cast(0); int old_count = pRec->failCount.exchange(0, std::memory_order_relaxed); // a retry succeeded, just reset retriers - pRec->retriers = 0; + pRec->retriers.clear(); if (old_count > 0) { Note("http parent proxy %s:%d restored", pRec->hostname, pRec->port); diff --git a/proxy/http/HttpTransact.cc b/proxy/http/HttpTransact.cc index 6e66aa4589c..1dea8c29034 100644 --- a/proxy/http/HttpTransact.cc +++ b/proxy/http/HttpTransact.cc @@ -281,6 +281,17 @@ nextParent(HttpTransact::State *s) } } +inline static void +retryComplete(HttpTransact::State *s) +{ + url_mapping *mp = s->url_map.getMapping(); + if (mp && mp->strategy) { + mp->strategy->retryComplete(reinterpret_cast(s->state_machine), s->parent_result.hostname, s->parent_result.port); + } else if (s->parent_params) { + s->parent_params->retryComplete(&s->parent_result); + } +} + inline static bool is_localhost(const char *name, int len) { @@ -3634,6 +3645,12 @@ HttpTransact::handle_response_from_parent(State *s) TxnDebug("http_trans", "[handle_response_from_parent] (hrfp)"); HTTP_RELEASE_ASSERT(s->current.server == &s->parent_info); + // if this parent was retried from a markdown, then + // notify that the retry has completed. + if (s->parent_result.retry) { + retryComplete(s); + } + simple_or_unavailable_server_retry(s); s->parent_info.state = s->current.state; diff --git a/proxy/http/remap/NextHopConsistentHash.cc b/proxy/http/remap/NextHopConsistentHash.cc index a8943620c84..89bbdca2382 100644 --- a/proxy/http/remap/NextHopConsistentHash.cc +++ b/proxy/http/remap/NextHopConsistentHash.cc @@ -326,7 +326,8 @@ NextHopConsistentHash::findNextHop(TSHttpTxn txnp, void *ih, time_t now) // check if the host is retryable. It's retryable if the retry window has elapsed _now == 0 ? _now = time(nullptr) : _now = now; if ((pRec->failedAt.load() + retry_time) < static_cast(_now)) { - if (pRec->retriers.fetch_add(1, std::memory_order_relaxed) < max_retriers) { + NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] next hop %s, retriers: %d", sm_id, pRec->hostname.c_str(), pRec->retriers()); + if (pRec->retriers.inc(max_retriers)) { nextHopRetry = true; result->last_parent = pRec->host_index; result->last_lookup = pRec->group_index; @@ -334,10 +335,8 @@ NextHopConsistentHash::findNextHop(TSHttpTxn txnp, void *ih, time_t now) result->result = PARENT_SPECIFIED; NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] next hop %s is now retryable, marked it available, retriers: %d, max_retriers: %d.", sm_id, - pRec->hostname.c_str(), pRec->retriers.load(), max_retriers); + pRec->hostname.c_str(), pRec->retriers(), max_retriers); break; - } else { - pRec->retriers--; } } } diff --git a/proxy/http/remap/NextHopHealthStatus.cc b/proxy/http/remap/NextHopHealthStatus.cc index 9dad0cb6a4a..58490aac15a 100644 --- a/proxy/http/remap/NextHopHealthStatus.cc +++ b/proxy/http/remap/NextHopHealthStatus.cc @@ -149,3 +149,25 @@ NextHopHealthStatus::markNextHop(TSHttpTxn txn, const char *hostname, const int break; } } + +void +NextHopHealthStatus::retryComplete(TSHttpTxn txn, const char *hostname, const int port) +{ + HttpSM *sm = reinterpret_cast(txn); + ParentResult result = sm->t_state.parent_result; + int64_t sm_id = sm->sm_id; + + // make sure we're called back with a result structure for a parent that is being retried. + if (result.result != PARENT_SPECIFIED && !result.retry) { + return; + } + + const std::string host_port = HostRecord::makeHostPort(hostname, port); + auto iter = host_map.find(host_port); + if (iter == host_map.end()) { + NH_Debug(NH_DEBUG_TAG, "[%" PRId64 "] no host named %s found in host_map", sm_id, host_port.c_str()); + return; + } + + iter->second->retriers.dec(); +} diff --git a/proxy/http/remap/NextHopRoundRobin.cc b/proxy/http/remap/NextHopRoundRobin.cc index d85885f93b3..c95da642181 100644 --- a/proxy/http/remap/NextHopRoundRobin.cc +++ b/proxy/http/remap/NextHopRoundRobin.cc @@ -150,15 +150,13 @@ NextHopRoundRobin::findNextHop(TSHttpTxn txnp, void *ih, time_t now) _now == 0 ? _now = time(nullptr) : _now = now; if (((result->wrap_around) || (cur_host->failedAt + retry_time) < static_cast(_now)) && host_stat == TS_HOST_STATUS_UP) { - if (cur_host->retriers.fetch_add(1, std::memory_order_relaxed) < max_retriers) { + if (cur_host->retriers.inc(max_retriers)) { // Reuse the parent parentUp = true; parentRetry = true; NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] NextHop marked for retry %s:%d, max_retriers: %d, retriers: %d", sm_id, cur_host->hostname.c_str(), host_groups[cur_grp_index][cur_hst_index]->getPort(scheme), max_retriers, - cur_host->retriers.load()); - } else { - cur_host->retriers--; + cur_host->retriers()); } } else { // not retryable or available. parentUp = false; diff --git a/proxy/http/remap/NextHopSelectionStrategy.cc b/proxy/http/remap/NextHopSelectionStrategy.cc index c57555ee1cc..4bbccbd6dbc 100644 --- a/proxy/http/remap/NextHopSelectionStrategy.cc +++ b/proxy/http/remap/NextHopSelectionStrategy.cc @@ -293,6 +293,12 @@ NextHopSelectionStrategy::responseIsRetryable(int64_t sm_id, HttpTransact::Curre return PARENT_RETRY_NONE; } +void +NextHopSelectionStrategy::retryComplete(TSHttpTxn txnp, const char *hostname, const int port) +{ + return passive_health.retryComplete(txnp, hostname, port); +} + namespace YAML { template <> struct convert { diff --git a/proxy/http/remap/NextHopSelectionStrategy.h b/proxy/http/remap/NextHopSelectionStrategy.h index 0c483a3668b..ca4a1749a79 100644 --- a/proxy/http/remap/NextHopSelectionStrategy.h +++ b/proxy/http/remap/NextHopSelectionStrategy.h @@ -53,6 +53,7 @@ struct NHHealthStatus { virtual bool isNextHopAvailable(TSHttpTxn txn, const char *hostname, const int port, void *ih = nullptr) = 0; virtual void markNextHop(TSHttpTxn txn, const char *hostname, const int port, const NHCmd status, void *ih = nullptr, const time_t now = 0) = 0; + virtual void retryComplete(TSHttpTxn txn, const char *hostname, const int port) = 0; virtual ~NHHealthStatus() {} }; @@ -114,7 +115,7 @@ struct HostRecord : ATSConsistentHashNode { int host_index; int group_index; std::vector> protocols; - std::atomic retriers = 0; + pRetriers retriers; // construct without locking the _mutex. HostRecord() @@ -128,7 +129,6 @@ struct HostRecord : ATSConsistentHashNode { host_index = -1; group_index = -1; available = true; - retriers = 0; } // copy constructor to avoid copying the _mutex. @@ -144,7 +144,7 @@ struct HostRecord : ATSConsistentHashNode { group_index = -1; available = true; protocols = o.protocols; - retriers = o.available.load(); + retriers = o.retriers; } // assign without copying the _mutex. @@ -161,7 +161,7 @@ struct HostRecord : ATSConsistentHashNode { group_index = o.group_index; available = o.available.load(); protocols = o.protocols; - retriers = o.retriers.load(); + retriers = o.retriers; return *this; } @@ -173,9 +173,7 @@ struct HostRecord : ATSConsistentHashNode { std::lock_guard lock(_mutex); failedAt = time(nullptr); available = false; - if (--retriers < 0) { - retriers = 0; - } + retriers.clear(); } } @@ -187,9 +185,9 @@ struct HostRecord : ATSConsistentHashNode { std::lock_guard lock(_mutex); failedAt = 0; failCount = 0; - retriers = 0; upAt = time(nullptr); available = true; + retriers.clear(); } } @@ -226,6 +224,7 @@ class NextHopHealthStatus : public NHHealthStatus bool isNextHopAvailable(TSHttpTxn txn, const char *hostname, const int port, void *ih = nullptr) override; void markNextHop(TSHttpTxn txn, const char *hostname, const int port, const NHCmd status, void *ih = nullptr, const time_t now = 0) override; + void retryComplete(TSHttpTxn txn, const char *hostname, const int port) override; NextHopHealthStatus(){}; private: @@ -246,6 +245,8 @@ class NextHopSelectionStrategy virtual ParentRetry_t responseIsRetryable(int64_t sm_id, HttpTransact::CurrentInfo ¤t_info, HTTPStatus response_code); + void retryComplete(TSHttpTxn txn, const char *hostname, const int port); + std::string strategy_name; bool go_direct = true; bool parent_is_proxy = true;