Skip to content

Commit 3e8f2eb

Browse files
authored
backout down parent retry limiting in parent selection and nexthop (#8546)
strategies.
1 parent 42ad946 commit 3e8f2eb

12 files changed

+31
-202
lines changed

mgmt/RecordsConfig.cc

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -441,8 +441,6 @@ static const RecordElement RecordsConfig[] =
441441
//# the retry window for the parent to be marked down
442442
{RECT_CONFIG, "proxy.config.http.parent_proxy.fail_threshold", RECD_INT, "10", RECU_DYNAMIC, RR_NULL, RECC_NULL, nullptr, RECA_NULL}
443443
,
444-
{RECT_CONFIG, "proxy.config.http.parent_proxy.max_trans_retries", RECD_INT, "2", RECU_DYNAMIC, RR_NULL, RECC_NULL, nullptr, RECA_NULL}
445-
,
446444
{RECT_CONFIG, "proxy.config.http.parent_proxy.total_connect_attempts", RECD_INT, "4", RECU_DYNAMIC, RR_NULL, RECC_NULL, nullptr, RECA_NULL}
447445
,
448446
{RECT_CONFIG, "proxy.config.http.parent_proxy.per_parent_connect_attempts", RECD_INT, "2", RECU_DYNAMIC, RR_NULL, RECC_NULL, nullptr, RECA_NULL}

proxy/ParentConsistentHash.cc

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -235,17 +235,13 @@ ParentConsistentHash::selectParent(bool first_call, ParentResult *result, Reques
235235
Debug("parent_select", "Parent.failedAt = %u, retry = %u, xact_start = %u", static_cast<unsigned>(pRec->failedAt.load()),
236236
static_cast<unsigned>(retry_time), static_cast<unsigned>(request_info->xact_start));
237237
if ((pRec->failedAt.load() + retry_time) < request_info->xact_start) {
238-
if (pRec->retriers.inc(max_retriers)) {
239-
parentRetry = true;
240-
// make sure that the proper state is recorded in the result structure
241-
result->last_parent = pRec->idx;
242-
result->last_lookup = last_lookup;
243-
result->retry = parentRetry;
244-
result->result = PARENT_SPECIFIED;
245-
Debug("parent_select", "Down parent %s is now retryable, retriers = %d, max_retriers = %d", pRec->hostname,
246-
pRec->retriers(), max_retriers);
247-
break;
248-
}
238+
parentRetry = true;
239+
// make sure that the proper state is recorded in the result structure
240+
result->last_parent = pRec->idx;
241+
result->last_lookup = last_lookup;
242+
result->retry = parentRetry;
243+
result->result = PARENT_SPECIFIED;
244+
break;
249245
}
250246
}
251247
Debug("parent_select", "wrap_around[PRIMARY]: %d, wrap_around[SECONDARY]: %d", wrap_around[PRIMARY], wrap_around[SECONDARY]);
@@ -393,7 +389,6 @@ ParentConsistentHash::markParentUp(ParentResult *result)
393389

394390
pRec->failedAt = static_cast<time_t>(0);
395391
int old_count = pRec->failCount.exchange(0, std::memory_order_relaxed);
396-
pRec->retriers.clear();
397392

398393
if (old_count > 0) {
399394
Note("http parent proxy %s:%d restored", pRec->hostname, pRec->port);

proxy/ParentRoundRobin.cc

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -157,16 +157,13 @@ ParentRoundRobin::selectParent(bool first_call, ParentResult *result, RequestDat
157157
} else {
158158
if ((result->wrap_around) ||
159159
(((parents[cur_index].failedAt + retry_time) < request_info->xact_start) && host_stat == TS_HOST_STATUS_UP)) {
160-
if (parents[cur_index].retriers.inc(max_retriers)) {
161-
Debug("parent_select",
162-
"Parent[%d].failedAt = %u, retry = %u, retriers = %d, max_retriers = %u, xact_start = %" PRId64 " but wrap = %d",
163-
cur_index, static_cast<unsigned>(parents[cur_index].failedAt.load()), retry_time, parents[cur_index].retriers(),
164-
max_retriers, static_cast<int64_t>(request_info->xact_start), result->wrap_around);
165-
// Reuse the parent
166-
parentUp = true;
167-
parentRetry = true;
168-
Debug("parent_select", "Parent marked for retry %s:%d", parents[cur_index].hostname, parents[cur_index].port);
169-
}
160+
Debug("parent_select", "Parent[%d].failedAt = %u, retry = %u, xact_start = %" PRId64 " but wrap = %d", cur_index,
161+
static_cast<unsigned>(parents[cur_index].failedAt.load()), retry_time, static_cast<int64_t>(request_info->xact_start),
162+
result->wrap_around);
163+
// Reuse the parent
164+
parentUp = true;
165+
parentRetry = true;
166+
Debug("parent_select", "Parent marked for retry %s:%d", parents[cur_index].hostname, parents[cur_index].port);
170167
} else {
171168
parentUp = false;
172169
}

proxy/ParentSelection.cc

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -539,7 +539,6 @@ ParentRecord::ProcessParents(char *val, bool isPrimary)
539539
this->parents[i].name = this->parents[i].hostname;
540540
this->parents[i].available = true;
541541
this->parents[i].weight = weight;
542-
this->parents[i].retriers.clear();
543542
if (tmp3) {
544543
memcpy(this->parents[i].hash_string, tmp3 + 1, strlen(tmp3));
545544
this->parents[i].name = this->parents[i].hash_string;
@@ -555,7 +554,6 @@ ParentRecord::ProcessParents(char *val, bool isPrimary)
555554
this->secondary_parents[i].name = this->secondary_parents[i].hostname;
556555
this->secondary_parents[i].available = true;
557556
this->secondary_parents[i].weight = weight;
558-
this->secondary_parents[i].retriers.clear();
559557
if (tmp3) {
560558
memcpy(this->secondary_parents[i].hash_string, tmp3 + 1, strlen(tmp3));
561559
this->secondary_parents[i].name = this->secondary_parents[i].hash_string;
@@ -1839,9 +1837,6 @@ EXCLUSIVE_REGRESSION_TEST(PARENTSELECTION)(RegressionTest * /* t ATS_UNUSED */,
18391837
FP;
18401838
RE(verify(result, PARENT_SPECIFIED, "carol", 80), 211);
18411839

1842-
// max_retriers tests
1843-
SET_MAX_RETRIERS(1);
1844-
18451840
// Test 212
18461841
tbl[0] = '\0';
18471842
ST(212);
@@ -1862,16 +1857,6 @@ EXCLUSIVE_REGRESSION_TEST(PARENTSELECTION)(RegressionTest * /* t ATS_UNUSED */,
18621857
FP;
18631858
RE(verify(result, PARENT_SPECIFIED, "minnie", 80), 213);
18641859

1865-
// Test 214
1866-
// goofy gets chosen because max_retriers was set to 1
1867-
// and goofy becomes available.
1868-
sleep(params->policy.ParentRetryTime + 3);
1869-
ST(214);
1870-
REINIT;
1871-
br(request, "i.am.mouse.com");
1872-
FP;
1873-
RE(verify(result, PARENT_SPECIFIED, "goofy", 80), 214);
1874-
18751860
delete request;
18761861
delete result;
18771862
delete params;

proxy/ParentSelection.h

Lines changed: 0 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -105,64 +105,7 @@ struct SimpleRetryResponseCodes {
105105
private:
106106
std::vector<int> codes;
107107
};
108-
// class pRetriers
109-
//
110-
// Count of retriers with atomic read, increment, and decrement.
111-
//
112-
class pRetriers
113-
{
114-
public:
115-
int
116-
operator()() const
117-
{
118-
return _v.load();
119-
}
120-
121-
void
122-
clear()
123-
{
124-
_v = 0;
125-
}
126-
127-
bool
128-
inc(int max_retriers)
129-
{
130-
ink_assert(max_retriers > 0);
131-
132-
int r = _v.load(std::memory_order_relaxed);
133-
while (r < max_retriers) {
134-
if (_v.compare_exchange_weak(r, r + 1)) {
135-
return true;
136-
}
137-
}
138-
return false;
139-
}
140-
141-
void
142-
dec()
143-
{
144-
int r = _v.load(std::memory_order_relaxed);
145-
while (r > 0) {
146-
if (_v.compare_exchange_weak(r, r - 1)) {
147-
break;
148-
}
149-
}
150-
}
151-
152-
pRetriers() = default;
153108

154-
pRetriers(pRetriers const &o) { _v = o._v.load(); }
155-
156-
pRetriers &
157-
operator=(pRetriers const &o)
158-
{
159-
_v = o._v.load();
160-
return *this;
161-
}
162-
163-
private:
164-
std::atomic<int> _v = 0;
165-
};
166109
// struct pRecord
167110
//
168111
// A record for an individual parent
@@ -178,13 +121,6 @@ struct pRecord : ATSConsistentHashNode {
178121
int idx;
179122
float weight;
180123
char hash_string[MAXDNAME + 1];
181-
pRetriers retriers;
182-
183-
void
184-
retryComplete()
185-
{
186-
retriers.dec();
187-
}
188124
};
189125

190126
typedef ControlMatcher<ParentRecord, ParentResult> P_table;
@@ -424,16 +360,6 @@ class ParentSelectionStrategy
424360

425361
// virtual destructor.
426362
virtual ~ParentSelectionStrategy(){};
427-
428-
void
429-
retryComplete(ParentResult *result)
430-
{
431-
pRecord *p = getParents(result);
432-
uint32_t n = numParents(result);
433-
if (p != nullptr && result->last_parent < n) {
434-
p[result->last_parent].retryComplete();
435-
}
436-
}
437363
};
438364

439365
class ParentConfigParams : public ConfigInfo
@@ -486,15 +412,6 @@ class ParentConfigParams : public ConfigInfo
486412
}
487413
}
488414

489-
void
490-
retryComplete(ParentResult *result)
491-
{
492-
if (!result->is_api_result()) {
493-
ink_release_assert(result != nullptr);
494-
result->rec->selection_strategy->retryComplete(result);
495-
}
496-
}
497-
498415
P_table *parent_table;
499416
ParentRecord *DefaultParent;
500417
ParentSelectionPolicy policy;

proxy/ParentSelectionStrategy.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,6 @@ ParentSelectionStrategy::markParentUp(ParentResult *result)
123123
pRec->failedAt = static_cast<time_t>(0);
124124
int old_count = pRec->failCount.exchange(0, std::memory_order_relaxed);
125125
// a retry succeeded, just reset retriers
126-
pRec->retriers.clear();
127126

128127
if (old_count > 0) {
129128
Note("http parent proxy %s:%d restored", pRec->hostname, pRec->port);

proxy/http/HttpTransact.cc

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -286,17 +286,6 @@ nextParent(HttpTransact::State *s)
286286
}
287287
}
288288
289-
inline static void
290-
retryComplete(HttpTransact::State *s)
291-
{
292-
url_mapping *mp = s->url_map.getMapping();
293-
if (mp && mp->strategy) {
294-
mp->strategy->retryComplete(reinterpret_cast<TSHttpTxn>(s->state_machine), s->parent_result.hostname, s->parent_result.port);
295-
} else if (s->parent_params) {
296-
s->parent_params->retryComplete(&s->parent_result);
297-
}
298-
}
299-
300289
inline static bool
301290
is_localhost(const char *name, int len)
302291
{
@@ -3637,7 +3626,7 @@ HttpTransact::handle_response_from_parent(State *s)
36373626
// if this parent was retried from a markdown, then
36383627
// notify that the retry has completed.
36393628
if (s->parent_result.retry) {
3640-
retryComplete(s);
3629+
markParentUp(s);
36413630
}
36423631

36433632
simple_or_unavailable_server_retry(s);

proxy/http/remap/NextHopConsistentHash.cc

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -366,18 +366,13 @@ NextHopConsistentHash::findNextHop(TSHttpTxn txnp, void *ih, time_t now)
366366
if (!pRec->available.load() && host_stat == TS_HOST_STATUS_UP) {
367367
_now == 0 ? _now = time(nullptr) : _now = now;
368368
if ((pRec->failedAt.load() + retry_time) < static_cast<unsigned>(_now)) {
369-
NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] next hop %s, retriers: %d", sm_id, pRec->hostname.c_str(), pRec->retriers());
370-
if (pRec->retriers.inc(max_retriers)) {
371-
nextHopRetry = true;
372-
result.last_parent = pRec->host_index;
373-
result.last_lookup = pRec->group_index;
374-
result.retry = nextHopRetry;
375-
result.result = PARENT_SPECIFIED;
376-
NH_Debug(NH_DEBUG_TAG,
377-
"[%" PRIu64 "] next hop %s is now retryable, marked it available, retriers: %d, max_retriers: %d.", sm_id,
378-
pRec->hostname.c_str(), pRec->retriers(), max_retriers);
379-
break;
380-
}
369+
nextHopRetry = true;
370+
result.last_parent = pRec->host_index;
371+
result.last_lookup = pRec->group_index;
372+
result.retry = nextHopRetry;
373+
result.result = PARENT_SPECIFIED;
374+
NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] next hop %s is now retryable", sm_id, pRec->hostname.c_str());
375+
break;
381376
}
382377
}
383378

proxy/http/remap/NextHopHealthStatus.cc

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -148,25 +148,3 @@ NextHopHealthStatus::markNextHop(TSHttpTxn txn, const char *hostname, const int
148148
break;
149149
}
150150
}
151-
152-
void
153-
NextHopHealthStatus::retryComplete(TSHttpTxn txn, const char *hostname, const int port)
154-
{
155-
HttpSM *sm = reinterpret_cast<HttpSM *>(txn);
156-
ParentResult result = sm->t_state.parent_result;
157-
int64_t sm_id = sm->sm_id;
158-
159-
// make sure we're called back with a result structure for a parent that is being retried.
160-
if (result.result != PARENT_SPECIFIED && !result.retry) {
161-
return;
162-
}
163-
164-
const std::string host_port = HostRecord::makeHostPort(hostname, port);
165-
auto iter = host_map.find(host_port);
166-
if (iter == host_map.end()) {
167-
NH_Debug(NH_DEBUG_TAG, "[%" PRId64 "] no host named %s found in host_map", sm_id, host_port.c_str());
168-
return;
169-
}
170-
171-
iter->second->retriers.dec();
172-
}

proxy/http/remap/NextHopRoundRobin.cc

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -150,14 +150,11 @@ NextHopRoundRobin::findNextHop(TSHttpTxn txnp, void *ih, time_t now)
150150
_now == 0 ? _now = time(nullptr) : _now = now;
151151
if (((result->wrap_around) || (cur_host->failedAt + retry_time) < static_cast<unsigned>(_now)) &&
152152
host_stat == TS_HOST_STATUS_UP) {
153-
if (cur_host->retriers.inc(max_retriers)) {
154-
// Reuse the parent
155-
parentUp = true;
156-
parentRetry = true;
157-
NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] NextHop marked for retry %s:%d, max_retriers: %d, retriers: %d", sm_id,
158-
cur_host->hostname.c_str(), host_groups[cur_grp_index][cur_hst_index]->getPort(scheme), max_retriers,
159-
cur_host->retriers());
160-
}
153+
// Reuse the parent
154+
parentUp = true;
155+
parentRetry = true;
156+
NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] NextHop marked for retry %s:%d", sm_id, cur_host->hostname.c_str(),
157+
host_groups[cur_grp_index][cur_hst_index]->getPort(scheme));
161158
} else { // not retryable or available.
162159
parentUp = false;
163160
}

0 commit comments

Comments
 (0)