From 000de0545d80a5b146f813f6b13bb6efb7b3d88c Mon Sep 17 00:00:00 2001 From: John Rushford Date: Tue, 7 Dec 2021 00:20:24 +0000 Subject: [PATCH] backout down parent retry limiting in parent selection and nexthop strategies. --- mgmt/RecordsConfig.cc | 2 - proxy/ParentConsistentHash.cc | 19 ++--- proxy/ParentRoundRobin.cc | 17 ++-- proxy/ParentSelection.cc | 15 ---- proxy/ParentSelection.h | 83 -------------------- proxy/ParentSelectionStrategy.cc | 1 - proxy/http/HttpTransact.cc | 13 +-- proxy/http/remap/NextHopConsistentHash.cc | 19 ++--- proxy/http/remap/NextHopHealthStatus.cc | 22 ------ proxy/http/remap/NextHopRoundRobin.cc | 13 ++- proxy/http/remap/NextHopSelectionStrategy.cc | 21 +---- proxy/http/remap/NextHopSelectionStrategy.h | 8 -- 12 files changed, 31 insertions(+), 202 deletions(-) diff --git a/mgmt/RecordsConfig.cc b/mgmt/RecordsConfig.cc index 0b83fb1c4ff..aafece2999a 100644 --- a/mgmt/RecordsConfig.cc +++ b/mgmt/RecordsConfig.cc @@ -441,8 +441,6 @@ static const RecordElement RecordsConfig[] = //# the retry window for the parent to be marked down {RECT_CONFIG, "proxy.config.http.parent_proxy.fail_threshold", RECD_INT, "10", RECU_DYNAMIC, RR_NULL, RECC_NULL, nullptr, RECA_NULL} , - {RECT_CONFIG, "proxy.config.http.parent_proxy.max_trans_retries", RECD_INT, "2", RECU_DYNAMIC, RR_NULL, RECC_NULL, nullptr, RECA_NULL} - , {RECT_CONFIG, "proxy.config.http.parent_proxy.total_connect_attempts", RECD_INT, "4", RECU_DYNAMIC, RR_NULL, RECC_NULL, nullptr, RECA_NULL} , {RECT_CONFIG, "proxy.config.http.parent_proxy.per_parent_connect_attempts", RECD_INT, "2", RECU_DYNAMIC, RR_NULL, RECC_NULL, nullptr, RECA_NULL} diff --git a/proxy/ParentConsistentHash.cc b/proxy/ParentConsistentHash.cc index 549cb8f0277..91f0a301366 100644 --- a/proxy/ParentConsistentHash.cc +++ b/proxy/ParentConsistentHash.cc @@ -235,17 +235,13 @@ ParentConsistentHash::selectParent(bool first_call, ParentResult *result, Reques Debug("parent_select", "Parent.failedAt = %u, retry = %u, xact_start = %u", static_cast(pRec->failedAt.load()), static_cast(retry_time), static_cast(request_info->xact_start)); if ((pRec->failedAt.load() + retry_time) < request_info->xact_start) { - if (pRec->retriers.inc(max_retriers)) { - parentRetry = true; - // make sure that the proper state is recorded in the result structure - result->last_parent = pRec->idx; - result->last_lookup = last_lookup; - result->retry = parentRetry; - result->result = PARENT_SPECIFIED; - Debug("parent_select", "Down parent %s is now retryable, retriers = %d, max_retriers = %d", pRec->hostname, - pRec->retriers(), max_retriers); - break; - } + parentRetry = true; + // make sure that the proper state is recorded in the result structure + result->last_parent = pRec->idx; + result->last_lookup = last_lookup; + result->retry = parentRetry; + result->result = PARENT_SPECIFIED; + break; } } Debug("parent_select", "wrap_around[PRIMARY]: %d, wrap_around[SECONDARY]: %d", wrap_around[PRIMARY], wrap_around[SECONDARY]); @@ -393,7 +389,6 @@ ParentConsistentHash::markParentUp(ParentResult *result) pRec->failedAt = static_cast(0); int old_count = pRec->failCount.exchange(0, std::memory_order_relaxed); - pRec->retriers.clear(); if (old_count > 0) { Note("http parent proxy %s:%d restored", pRec->hostname, pRec->port); diff --git a/proxy/ParentRoundRobin.cc b/proxy/ParentRoundRobin.cc index 2aba8d98bf5..ede7d8d4d76 100644 --- a/proxy/ParentRoundRobin.cc +++ b/proxy/ParentRoundRobin.cc @@ -157,16 +157,13 @@ ParentRoundRobin::selectParent(bool first_call, ParentResult *result, RequestDat } else { if ((result->wrap_around) || (((parents[cur_index].failedAt + retry_time) < request_info->xact_start) && host_stat == TS_HOST_STATUS_UP)) { - if (parents[cur_index].retriers.inc(max_retriers)) { - Debug("parent_select", - "Parent[%d].failedAt = %u, retry = %u, retriers = %d, max_retriers = %u, xact_start = %" PRId64 " but wrap = %d", - cur_index, static_cast(parents[cur_index].failedAt.load()), retry_time, parents[cur_index].retriers(), - max_retriers, static_cast(request_info->xact_start), result->wrap_around); - // Reuse the parent - parentUp = true; - parentRetry = true; - Debug("parent_select", "Parent marked for retry %s:%d", parents[cur_index].hostname, parents[cur_index].port); - } + Debug("parent_select", "Parent[%d].failedAt = %u, retry = %u, xact_start = %" PRId64 " but wrap = %d", cur_index, + static_cast(parents[cur_index].failedAt.load()), retry_time, static_cast(request_info->xact_start), + result->wrap_around); + // Reuse the parent + parentUp = true; + parentRetry = true; + Debug("parent_select", "Parent marked for retry %s:%d", parents[cur_index].hostname, parents[cur_index].port); } else { parentUp = false; } diff --git a/proxy/ParentSelection.cc b/proxy/ParentSelection.cc index 48de7ab548a..f06d28f733d 100644 --- a/proxy/ParentSelection.cc +++ b/proxy/ParentSelection.cc @@ -539,7 +539,6 @@ ParentRecord::ProcessParents(char *val, bool isPrimary) this->parents[i].name = this->parents[i].hostname; this->parents[i].available = true; this->parents[i].weight = weight; - this->parents[i].retriers.clear(); if (tmp3) { memcpy(this->parents[i].hash_string, tmp3 + 1, strlen(tmp3)); this->parents[i].name = this->parents[i].hash_string; @@ -555,7 +554,6 @@ ParentRecord::ProcessParents(char *val, bool isPrimary) this->secondary_parents[i].name = this->secondary_parents[i].hostname; this->secondary_parents[i].available = true; this->secondary_parents[i].weight = weight; - this->secondary_parents[i].retriers.clear(); if (tmp3) { memcpy(this->secondary_parents[i].hash_string, tmp3 + 1, strlen(tmp3)); this->secondary_parents[i].name = this->secondary_parents[i].hash_string; @@ -1839,9 +1837,6 @@ EXCLUSIVE_REGRESSION_TEST(PARENTSELECTION)(RegressionTest * /* t ATS_UNUSED */, FP; RE(verify(result, PARENT_SPECIFIED, "carol", 80), 211); - // max_retriers tests - SET_MAX_RETRIERS(1); - // Test 212 tbl[0] = '\0'; ST(212); @@ -1862,16 +1857,6 @@ EXCLUSIVE_REGRESSION_TEST(PARENTSELECTION)(RegressionTest * /* t ATS_UNUSED */, FP; RE(verify(result, PARENT_SPECIFIED, "minnie", 80), 213); - // Test 214 - // goofy gets chosen because max_retriers was set to 1 - // and goofy becomes available. - sleep(params->policy.ParentRetryTime + 3); - ST(214); - REINIT; - br(request, "i.am.mouse.com"); - FP; - RE(verify(result, PARENT_SPECIFIED, "goofy", 80), 214); - delete request; delete result; delete params; diff --git a/proxy/ParentSelection.h b/proxy/ParentSelection.h index 8b7748f3ea7..8fcbedc9701 100644 --- a/proxy/ParentSelection.h +++ b/proxy/ParentSelection.h @@ -105,64 +105,7 @@ struct SimpleRetryResponseCodes { private: std::vector codes; }; -// class pRetriers -// -// Count of retriers with atomic read, increment, and decrement. -// -class pRetriers -{ -public: - int - operator()() const - { - return _v.load(); - } - - void - clear() - { - _v = 0; - } - - bool - inc(int max_retriers) - { - ink_assert(max_retriers > 0); - - int r = _v.load(std::memory_order_relaxed); - while (r < max_retriers) { - if (_v.compare_exchange_weak(r, r + 1)) { - return true; - } - } - return false; - } - - void - dec() - { - int r = _v.load(std::memory_order_relaxed); - while (r > 0) { - if (_v.compare_exchange_weak(r, r - 1)) { - break; - } - } - } - - pRetriers() = default; - pRetriers(pRetriers const &o) { _v = o._v.load(); } - - pRetriers & - operator=(pRetriers const &o) - { - _v = o._v.load(); - return *this; - } - -private: - std::atomic _v = 0; -}; // struct pRecord // // A record for an individual parent @@ -178,13 +121,6 @@ struct pRecord : ATSConsistentHashNode { int idx; float weight; char hash_string[MAXDNAME + 1]; - pRetriers retriers; - - void - retryComplete() - { - retriers.dec(); - } }; typedef ControlMatcher P_table; @@ -424,16 +360,6 @@ class ParentSelectionStrategy // virtual destructor. virtual ~ParentSelectionStrategy(){}; - - void - retryComplete(ParentResult *result) - { - pRecord *p = getParents(result); - uint32_t n = numParents(result); - if (p != nullptr && result->last_parent < n) { - p[result->last_parent].retryComplete(); - } - } }; class ParentConfigParams : public ConfigInfo @@ -486,15 +412,6 @@ class ParentConfigParams : public ConfigInfo } } - void - retryComplete(ParentResult *result) - { - if (!result->is_api_result()) { - ink_release_assert(result != nullptr); - result->rec->selection_strategy->retryComplete(result); - } - } - P_table *parent_table; ParentRecord *DefaultParent; ParentSelectionPolicy policy; diff --git a/proxy/ParentSelectionStrategy.cc b/proxy/ParentSelectionStrategy.cc index f82675cff4e..f3101ccc01e 100644 --- a/proxy/ParentSelectionStrategy.cc +++ b/proxy/ParentSelectionStrategy.cc @@ -123,7 +123,6 @@ ParentSelectionStrategy::markParentUp(ParentResult *result) pRec->failedAt = static_cast(0); int old_count = pRec->failCount.exchange(0, std::memory_order_relaxed); // a retry succeeded, just reset retriers - pRec->retriers.clear(); if (old_count > 0) { Note("http parent proxy %s:%d restored", pRec->hostname, pRec->port); diff --git a/proxy/http/HttpTransact.cc b/proxy/http/HttpTransact.cc index ac8a1df1406..bb277fa19b9 100644 --- a/proxy/http/HttpTransact.cc +++ b/proxy/http/HttpTransact.cc @@ -286,17 +286,6 @@ nextParent(HttpTransact::State *s) } } -inline static void -retryComplete(HttpTransact::State *s) -{ - url_mapping *mp = s->url_map.getMapping(); - if (mp && mp->strategy) { - mp->strategy->retryComplete(reinterpret_cast(s->state_machine), s->parent_result.hostname, s->parent_result.port); - } else if (s->parent_params) { - s->parent_params->retryComplete(&s->parent_result); - } -} - inline static bool is_localhost(const char *name, int len) { @@ -3637,7 +3626,7 @@ HttpTransact::handle_response_from_parent(State *s) // if this parent was retried from a markdown, then // notify that the retry has completed. if (s->parent_result.retry) { - retryComplete(s); + markParentUp(s); } simple_or_unavailable_server_retry(s); diff --git a/proxy/http/remap/NextHopConsistentHash.cc b/proxy/http/remap/NextHopConsistentHash.cc index 33b0dea8fe1..b94bb1e15f9 100644 --- a/proxy/http/remap/NextHopConsistentHash.cc +++ b/proxy/http/remap/NextHopConsistentHash.cc @@ -366,18 +366,13 @@ NextHopConsistentHash::findNextHop(TSHttpTxn txnp, void *ih, time_t now) if (!pRec->available.load() && host_stat == TS_HOST_STATUS_UP) { _now == 0 ? _now = time(nullptr) : _now = now; if ((pRec->failedAt.load() + retry_time) < static_cast(_now)) { - NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] next hop %s, retriers: %d", sm_id, pRec->hostname.c_str(), pRec->retriers()); - if (pRec->retriers.inc(max_retriers)) { - nextHopRetry = true; - result.last_parent = pRec->host_index; - result.last_lookup = pRec->group_index; - result.retry = nextHopRetry; - result.result = PARENT_SPECIFIED; - NH_Debug(NH_DEBUG_TAG, - "[%" PRIu64 "] next hop %s is now retryable, marked it available, retriers: %d, max_retriers: %d.", sm_id, - pRec->hostname.c_str(), pRec->retriers(), max_retriers); - break; - } + nextHopRetry = true; + result.last_parent = pRec->host_index; + result.last_lookup = pRec->group_index; + result.retry = nextHopRetry; + result.result = PARENT_SPECIFIED; + NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] next hop %s is now retryable", sm_id, pRec->hostname.c_str()); + break; } } diff --git a/proxy/http/remap/NextHopHealthStatus.cc b/proxy/http/remap/NextHopHealthStatus.cc index 19175d02bdb..b553b951040 100644 --- a/proxy/http/remap/NextHopHealthStatus.cc +++ b/proxy/http/remap/NextHopHealthStatus.cc @@ -148,25 +148,3 @@ NextHopHealthStatus::markNextHop(TSHttpTxn txn, const char *hostname, const int break; } } - -void -NextHopHealthStatus::retryComplete(TSHttpTxn txn, const char *hostname, const int port) -{ - HttpSM *sm = reinterpret_cast(txn); - ParentResult result = sm->t_state.parent_result; - int64_t sm_id = sm->sm_id; - - // make sure we're called back with a result structure for a parent that is being retried. - if (result.result != PARENT_SPECIFIED && !result.retry) { - return; - } - - const std::string host_port = HostRecord::makeHostPort(hostname, port); - auto iter = host_map.find(host_port); - if (iter == host_map.end()) { - NH_Debug(NH_DEBUG_TAG, "[%" PRId64 "] no host named %s found in host_map", sm_id, host_port.c_str()); - return; - } - - iter->second->retriers.dec(); -} diff --git a/proxy/http/remap/NextHopRoundRobin.cc b/proxy/http/remap/NextHopRoundRobin.cc index c95da642181..f2e872553e6 100644 --- a/proxy/http/remap/NextHopRoundRobin.cc +++ b/proxy/http/remap/NextHopRoundRobin.cc @@ -150,14 +150,11 @@ NextHopRoundRobin::findNextHop(TSHttpTxn txnp, void *ih, time_t now) _now == 0 ? _now = time(nullptr) : _now = now; if (((result->wrap_around) || (cur_host->failedAt + retry_time) < static_cast(_now)) && host_stat == TS_HOST_STATUS_UP) { - if (cur_host->retriers.inc(max_retriers)) { - // Reuse the parent - parentUp = true; - parentRetry = true; - NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] NextHop marked for retry %s:%d, max_retriers: %d, retriers: %d", sm_id, - cur_host->hostname.c_str(), host_groups[cur_grp_index][cur_hst_index]->getPort(scheme), max_retriers, - cur_host->retriers()); - } + // Reuse the parent + parentUp = true; + parentRetry = true; + NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] NextHop marked for retry %s:%d", sm_id, cur_host->hostname.c_str(), + host_groups[cur_grp_index][cur_hst_index]->getPort(scheme)); } else { // not retryable or available. parentUp = false; } diff --git a/proxy/http/remap/NextHopSelectionStrategy.cc b/proxy/http/remap/NextHopSelectionStrategy.cc index 1ff465cb8f0..f3b507c3064 100644 --- a/proxy/http/remap/NextHopSelectionStrategy.cc +++ b/proxy/http/remap/NextHopSelectionStrategy.cc @@ -43,17 +43,10 @@ constexpr const char *policy_strings[] = {"NH_UNDEFINED", "NH_FIRST_LIVE", "NH_R NextHopSelectionStrategy::NextHopSelectionStrategy(const std::string_view &name, const NHPolicyType &policy) { - int _max_retriers = 0; - strategy_name = name; - policy_type = policy; - NH_GetConfig(_max_retriers, "proxy.config.http.parent_proxy.max_trans_retries"); - - // config settings may not be available when running unit tests. - // so use the max_retriers default setting. - if (_max_retriers > 0) { - max_retriers = _max_retriers; - } - NH_Debug(NH_DEBUG_TAG, "Using a selection strategy of type %s, max_retriers: %d", policy_strings[policy], max_retriers); + strategy_name = name; + policy_type = policy; + + NH_Debug(NH_DEBUG_TAG, "Using a selection strategy of type %s", policy_strings[policy]); } // @@ -316,12 +309,6 @@ NextHopSelectionStrategy::responseIsRetryable(int64_t sm_id, HttpTransact::Curre return PARENT_RETRY_NONE; } -void -NextHopSelectionStrategy::retryComplete(TSHttpTxn txnp, const char *hostname, const int port) -{ - return passive_health.retryComplete(txnp, hostname, port); -} - namespace YAML { template <> struct convert { diff --git a/proxy/http/remap/NextHopSelectionStrategy.h b/proxy/http/remap/NextHopSelectionStrategy.h index 26a06068914..628c01d55f8 100644 --- a/proxy/http/remap/NextHopSelectionStrategy.h +++ b/proxy/http/remap/NextHopSelectionStrategy.h @@ -53,7 +53,6 @@ struct NHHealthStatus { virtual bool isNextHopAvailable(TSHttpTxn txn, const char *hostname, const int port, void *ih = nullptr) = 0; virtual void markNextHop(TSHttpTxn txn, const char *hostname, const int port, const NHCmd status, void *ih = nullptr, const time_t now = 0) = 0; - virtual void retryComplete(TSHttpTxn txn, const char *hostname, const int port) = 0; virtual ~NHHealthStatus() {} }; @@ -116,7 +115,6 @@ struct HostRecord : ATSConsistentHashNode { int group_index; bool self = false; std::vector> protocols; - pRetriers retriers; // construct without locking the _mutex. HostRecord() @@ -145,7 +143,6 @@ struct HostRecord : ATSConsistentHashNode { group_index = -1; available = true; protocols = o.protocols; - retriers = o.retriers; } // assign without copying the _mutex. @@ -162,7 +159,6 @@ struct HostRecord : ATSConsistentHashNode { group_index = o.group_index; available = o.available.load(); protocols = o.protocols; - retriers = o.retriers; return *this; } @@ -174,7 +170,6 @@ struct HostRecord : ATSConsistentHashNode { std::lock_guard lock(_mutex); failedAt = time(nullptr); available = false; - retriers.clear(); } } @@ -188,7 +183,6 @@ struct HostRecord : ATSConsistentHashNode { failCount = 0; upAt = time(nullptr); available = true; - retriers.clear(); } } @@ -225,7 +219,6 @@ class NextHopHealthStatus : public NHHealthStatus bool isNextHopAvailable(TSHttpTxn txn, const char *hostname, const int port, void *ih = nullptr) override; void markNextHop(TSHttpTxn txn, const char *hostname, const int port, const NHCmd status, void *ih = nullptr, const time_t now = 0) override; - void retryComplete(TSHttpTxn txn, const char *hostname, const int port) override; NextHopHealthStatus(){}; private: @@ -268,5 +261,4 @@ class NextHopSelectionStrategy uint32_t hst_index = 0; uint32_t num_parents = 0; uint32_t distance = 0; // index into the strategies list. - int max_retriers = 1; };