-
Notifications
You must be signed in to change notification settings - Fork 16
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
RetryingChannel retries 429/503s #350
Merged
Merged
Changes from 8 commits
Commits
Show all changes
9 commits
Select commit
Hold shift + click to select a range
f860522
RetryingChannel retries 500/503s
iamdanfox 28b240d
Add generated changelog entries
iamdanfox 73c0f60
fix test
iamdanfox c6926fe
Close response body
iamdanfox d40997b
Test capturing response body closing
iamdanfox 5e73ced
new graphs
iamdanfox 18f29fc
LFS url
iamdanfox 28fdcd2
H2 gives us anchor links
iamdanfox 109f1fe
only retry 429 and 503 for now
iamdanfox File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
type: improvement | ||
improvement: | ||
description: RetryingChannel retries 500/503s | ||
links: | ||
- https://github.com/palantir/dialogue/pull/350 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,12 +26,15 @@ | |
import com.palantir.dialogue.Endpoint; | ||
import com.palantir.dialogue.Request; | ||
import com.palantir.dialogue.Response; | ||
import com.palantir.logsafe.exceptions.SafeRuntimeException; | ||
import java.io.IOException; | ||
import java.util.Optional; | ||
import java.util.concurrent.Executor; | ||
import java.util.concurrent.atomic.AtomicInteger; | ||
import java.util.function.Supplier; | ||
import java.util.function.Function; | ||
|
||
/** | ||
* Retries calls to the underlying channel upon failure. | ||
* Immediately retries calls to the underlying channel upon failure. | ||
*/ | ||
final class RetryingChannel implements Channel { | ||
private static final int DEFAULT_MAX_RETRIES = 4; | ||
|
@@ -54,34 +57,69 @@ final class RetryingChannel implements Channel { | |
public ListenableFuture<Response> execute(Endpoint endpoint, Request request) { | ||
SettableFuture<Response> future = SettableFuture.create(); | ||
|
||
Supplier<ListenableFuture<Response>> callSupplier = () -> delegate.execute(endpoint, request); | ||
FutureCallback<Response> retryer = new RetryingCallback<>(callSupplier, future); | ||
Futures.addCallback(callSupplier.get(), retryer, DIRECT_EXECUTOR); | ||
Function<Integer, ListenableFuture<Response>> callSupplier = attempt -> { | ||
// TODO(dfox): include retry number in the request somehow | ||
return delegate.execute(endpoint, request); | ||
}; | ||
FutureCallback<Response> retryer = new RetryingCallback(callSupplier, future); | ||
Futures.addCallback(callSupplier.apply(0), retryer, DIRECT_EXECUTOR); | ||
|
||
return future; | ||
} | ||
|
||
private final class RetryingCallback<T> implements FutureCallback<T> { | ||
private final class RetryingCallback implements FutureCallback<Response> { | ||
private final AtomicInteger failures = new AtomicInteger(0); | ||
private final Supplier<ListenableFuture<T>> runnable; | ||
private final SettableFuture<T> delegate; | ||
private final Function<Integer, ListenableFuture<Response>> runnable; | ||
private final SettableFuture<Response> delegate; | ||
|
||
private RetryingCallback(Supplier<ListenableFuture<T>> runnable, SettableFuture<T> delegate) { | ||
private RetryingCallback( | ||
Function<Integer, ListenableFuture<Response>> runnable, SettableFuture<Response> delegate) { | ||
this.runnable = runnable; | ||
this.delegate = delegate; | ||
} | ||
|
||
@Override | ||
public void onSuccess(T result) { | ||
delegate.set(result); | ||
public void onSuccess(Response response) { | ||
// this condition should really match the BlacklistingChannel so that we don't hit the same host twice in | ||
// a row | ||
if (response.code() == 503 || response.code() == 500) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thinking a bit more carefully here, I think we should possibly just match c-j-r's old behaviour to minimize disruption on the rollout:
We can debate the 'retry 500s' thing separately. |
||
closeBody(response); | ||
retryOrFail(Optional.empty()); | ||
return; | ||
iamdanfox marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
boolean setSuccessfully = delegate.set(response); | ||
if (!setSuccessfully) { | ||
closeBody(response); | ||
} | ||
} | ||
|
||
@Override | ||
public void onFailure(Throwable throwable) { | ||
if (failures.incrementAndGet() < maxRetries) { | ||
Futures.addCallback(runnable.get(), this, DIRECT_EXECUTOR); | ||
retryOrFail(Optional.of(throwable)); | ||
} | ||
|
||
private void retryOrFail(Optional<Throwable> throwable) { | ||
int attempt = failures.incrementAndGet(); | ||
if (attempt < maxRetries) { | ||
Futures.addCallback(runnable.apply(attempt), this, DIRECT_EXECUTOR); | ||
} else { | ||
delegate.setException(throwable); | ||
if (throwable.isPresent()) { | ||
delegate.setException(throwable.get()); | ||
} else { | ||
delegate.setException(new SafeRuntimeException("Retries exhausted")); | ||
} | ||
} | ||
} | ||
|
||
private void closeBody(Response response) { | ||
if (response == null || response.body() == null) { | ||
return; | ||
} | ||
try { | ||
response.body().close(); | ||
} catch (IOException e) { | ||
delegate.setException(new SafeRuntimeException("Failed to close response body", e)); | ||
} | ||
} | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
4 changes: 2 additions & 2 deletions
4
simulation/src/test/resources/all_nodes_500[CONCURRENCY_LIMITER].png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion
2
simulation/src/test/resources/all_nodes_500[CONCURRENCY_LIMITER].txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
success=50.0% client_mean=PT0.6S server_cpu=PT2M received=200/200 codes={200=100, 500=100} | ||
success=59.0% client_mean=PT1.446S server_cpu=PT4M49.2S received=200/200 codes={200=118, Retries exhausted=82} |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
success=50.0% client_mean=PT0.6S server_cpu=PT2M received=200/200 codes={200=100, 500=100} | ||
success=59.0% client_mean=PT1.446S server_cpu=PT4M49.2S received=200/200 codes={200=118, Retries exhausted=82} |
4 changes: 2 additions & 2 deletions
4
simulation/src/test/resources/fast_500s_then_revert[CONCURRENCY_LIMITER].png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion
2
simulation/src/test/resources/fast_500s_then_revert[CONCURRENCY_LIMITER].txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
success=76.7% client_mean=PT0.055281733S server_cpu=PT3M27.306499709S received=3750/3750 codes={200=2875, 500=875} | ||
success=100.0% client_mean=PT0.078971555S server_cpu=PT4M56.143333328S received=3750/3750 codes={200=3750} |
4 changes: 2 additions & 2 deletions
4
simulation/src/test/resources/fast_500s_then_revert[ROUND_ROBIN].png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion
2
simulation/src/test/resources/fast_500s_then_revert[ROUND_ROBIN].txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
success=76.7% client_mean=PT0.055281733S server_cpu=PT3M27.306499709S received=3750/3750 codes={200=2875, 500=875} | ||
success=100.0% client_mean=PT0.078971555S server_cpu=PT4M56.143333328S received=3750/3750 codes={200=3750} |
4 changes: 2 additions & 2 deletions
4
simulation/src/test/resources/live_reloading[CONCURRENCY_LIMITER].png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion
2
simulation/src/test/resources/live_reloading[CONCURRENCY_LIMITER].txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
success=58.3% client_mean=PT0.7228S server_cpu=PT4M49.12S received=400/400 codes={200=233, 500=167} | ||
success=95.0% client_mean=PT1.5825125S server_cpu=PT9M1.09S received=400/400 codes={200=380, Retries exhausted=20} |
4 changes: 2 additions & 2 deletions
4
simulation/src/test/resources/live_reloading[ROUND_ROBIN].png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
success=58.3% client_mean=PT0.7228S server_cpu=PT4M49.12S received=400/400 codes={200=233, 500=167} | ||
success=95.0% client_mean=PT1.373525S server_cpu=PT9M9.41S received=400/400 codes={200=380, Retries exhausted=20} |
4 changes: 2 additions & 2 deletions
4
...on/src/test/resources/one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER].png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion
2
simulation/src/test/resources/one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER].txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
success=67.6% client_mean=PT0.6S server_cpu=PT5M6S received=510/510 codes={200=345, 500=165} | ||
success=96.7% client_mean=PT0.945882352S server_cpu=PT8M2.4S received=510/510 codes={200=493, Retries exhausted=17} |
4 changes: 2 additions & 2 deletions
4
simulation/src/test/resources/one_endpoint_dies_on_each_server[ROUND_ROBIN].png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion
2
simulation/src/test/resources/one_endpoint_dies_on_each_server[ROUND_ROBIN].txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
success=67.6% client_mean=PT0.6S server_cpu=PT5M6S received=510/510 codes={200=345, 500=165} | ||
success=96.7% client_mean=PT0.945882352S server_cpu=PT8M2.4S received=510/510 codes={200=493, Retries exhausted=17} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If it's OK with you I think I'd actually prefer to just stick with the vanilla guava - I find it kinda reassuring that there's no magic going on under the hood
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It just seems like extra boilerplate 🤷♂ feels off to have a utility exactly for this and then not use it