From 01fc9d89d43435835d6c58aa86571894706d30db Mon Sep 17 00:00:00 2001 From: Dan Fox Date: Thu, 9 Apr 2020 00:27:09 +0100 Subject: [PATCH 1/6] RetryingChannel reduces stacktrace creation --- .../dialogue/core/RetryingChannel.java | 31 +++++++++---------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/dialogue-core/src/main/java/com/palantir/dialogue/core/RetryingChannel.java b/dialogue-core/src/main/java/com/palantir/dialogue/core/RetryingChannel.java index d8d65a974..e4ac96c94 100644 --- a/dialogue-core/src/main/java/com/palantir/dialogue/core/RetryingChannel.java +++ b/dialogue-core/src/main/java/com/palantir/dialogue/core/RetryingChannel.java @@ -47,12 +47,11 @@ import java.util.concurrent.TimeUnit; import java.util.function.DoubleSupplier; import java.util.function.Supplier; +import javax.annotation.Nullable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -/** - * Immediately retries calls to the underlying channel upon failure. - */ +/** Retries failed requests by scheduling them onto a ScheduledExecutorService after an exponential backoff. */ final class RetryingChannel implements Channel { private static final Logger log = LoggerFactory.getLogger(RetryingChannel.class); @@ -156,9 +155,9 @@ ListenableFuture execute() { } @SuppressWarnings("FutureReturnValueIgnored") // error-prone bug - ListenableFuture retry(Throwable cause) { + ListenableFuture scheduleRetry(@Nullable Throwable throwableToLog) { long backoffNanoseconds = getBackoffNanoseconds(); - logRetry(backoffNanoseconds, cause); + logRetry(backoffNanoseconds, throwableToLog); if (backoffNanoseconds <= 0) { return wrap(delegate.execute(endpoint, request)); } @@ -181,15 +180,15 @@ private long getBackoffNanoseconds() { return Math.round(backoffSlotSize.toNanos() * jitter.getAsDouble() * upperBound); } - ListenableFuture success(Response response) { - // this condition should really match the BlacklistingChannel so that we don't hit the same host twice in - // a row + ListenableFuture handleHttpResponse(Response response) { if (Responses.isQosStatus(response)) { response.close(); - Throwable failure = - new SafeRuntimeException("Received retryable response", SafeArg.of("status", response.code())); if (++failures <= maxRetries) { - return retry(failure); + Throwable throwableToLog = log.isInfoEnabled() + ? new SafeRuntimeException( + "Received retryable response", SafeArg.of("status", response.code())) + : null; + return scheduleRetry(throwableToLog); } if (log.isDebugEnabled()) { log.debug( @@ -205,10 +204,10 @@ ListenableFuture success(Response response) { return Futures.immediateFuture(response); } - ListenableFuture failure(Throwable throwable) { + ListenableFuture handleThrowable(Throwable throwable) { if (++failures <= maxRetries) { if (shouldAttemptToRetry(throwable)) { - return retry(throwable); + return scheduleRetry(throwable); } else if (log.isDebugEnabled()) { log.debug( "Not attempting to retry failure", @@ -234,7 +233,7 @@ private boolean shouldAttemptToRetry(Throwable throwable) { return true; } - private void logRetry(long backoffNanoseconds, Throwable throwable) { + private void logRetry(long backoffNanoseconds, @Nullable Throwable throwable) { if (log.isInfoEnabled()) { log.info( "Retrying call after failure", @@ -251,9 +250,9 @@ private void logRetry(long backoffNanoseconds, Throwable throwable) { private ListenableFuture wrap(ListenableFuture input) { ListenableFuture result = input; if (!shouldPropagateQos(serverQoS)) { - result = Futures.transformAsync(result, this::success, MoreExecutors.directExecutor()); + result = Futures.transformAsync(result, this::handleHttpResponse, MoreExecutors.directExecutor()); } - result = Futures.catchingAsync(result, Throwable.class, this::failure, MoreExecutors.directExecutor()); + result = Futures.catchingAsync(result, Throwable.class, this::handleThrowable, MoreExecutors.directExecutor()); return result; } } From 4fb7bc8fc40bf5947c22ee4a1460073027d007e4 Mon Sep 17 00:00:00 2001 From: Dan Fox Date: Thu, 9 Apr 2020 00:36:26 +0100 Subject: [PATCH 2/6] Test showing our error parsing is broken --- .../verification/IntegrationTest.java | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/dialogue-client-verifier/src/test/java/com/palantir/verification/IntegrationTest.java b/dialogue-client-verifier/src/test/java/com/palantir/verification/IntegrationTest.java index 3a2dfd646..06a5954ff 100644 --- a/dialogue-client-verifier/src/test/java/com/palantir/verification/IntegrationTest.java +++ b/dialogue-client-verifier/src/test/java/com/palantir/verification/IntegrationTest.java @@ -17,6 +17,7 @@ package com.palantir.verification; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import com.google.common.base.Stopwatch; import com.google.common.collect.Iterables; @@ -27,6 +28,7 @@ import com.palantir.conjure.java.api.config.service.ServiceConfiguration; import com.palantir.conjure.java.api.config.service.UserAgent; import com.palantir.conjure.java.api.config.ssl.SslConfiguration; +import com.palantir.conjure.java.api.errors.RemoteException; import com.palantir.conjure.java.client.config.ClientConfiguration; import com.palantir.conjure.java.client.config.ClientConfigurations; import com.palantir.conjure.java.dialogue.serde.DefaultConjureRuntime; @@ -50,6 +52,7 @@ import java.util.Arrays; import java.util.Optional; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; import java.util.zip.GZIPOutputStream; import org.junit.After; import org.junit.Before; @@ -112,6 +115,27 @@ public void conjure_generated_blocking_interface_with_optional_binary_return_typ assertThat(maybeBinary.get()).hasSameContentAs(asInputStream("Hello, world")); } + @Test + public void deserializes_a_conjure_error_after_exhausting_retries() { + AtomicInteger calls = new AtomicInteger(0); + undertowHandler = exchange -> { + exchange.setStatusCode(429); + exchange.getResponseHeaders().put(Headers.CONTENT_TYPE, "application/json"); + exchange.getResponseSender() + .send("{" + + "\"errorCode\":\"FAILED_PRECONDITION\"," + + "\"errorName\":\"Default:FailedPrecondition\"," + + "\"errorInstanceId\":\"43580df1-e019-473b-bb3d-be6d489f36e5\"," + + "\"parameters\":{\"numCalls\":\"" + calls.getAndIncrement() + "\"}" + + "}\n"); + }; + + assertThatThrownBy(sampleServiceBlocking()::voidToVoid) + .isInstanceOf(RemoteException.class); + + assertThat(calls).describedAs("one initial call + 4 retries").hasValue(5); + } + @Test public void stream_3_gigabytes() throws IOException { long oneMegabyte = 1000_000; From 194846a2e446730665fbc700034a1b9132e7b2c3 Mon Sep 17 00:00:00 2001 From: Dan Fox Date: Thu, 9 Apr 2020 00:42:15 +0100 Subject: [PATCH 3/6] unit test final_exhausted_failure_response_body_is_not_closed --- .../dialogue/core/RetryingChannel.java | 3 +- .../dialogue/core/RetryingChannelTest.java | 29 +++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/dialogue-core/src/main/java/com/palantir/dialogue/core/RetryingChannel.java b/dialogue-core/src/main/java/com/palantir/dialogue/core/RetryingChannel.java index e4ac96c94..f8c53c164 100644 --- a/dialogue-core/src/main/java/com/palantir/dialogue/core/RetryingChannel.java +++ b/dialogue-core/src/main/java/com/palantir/dialogue/core/RetryingChannel.java @@ -182,8 +182,8 @@ private long getBackoffNanoseconds() { ListenableFuture handleHttpResponse(Response response) { if (Responses.isQosStatus(response)) { - response.close(); if (++failures <= maxRetries) { + response.close(); // nobody is going to read this response body Throwable throwableToLog = log.isInfoEnabled() ? new SafeRuntimeException( "Received retryable response", SafeArg.of("status", response.code())) @@ -196,6 +196,7 @@ ListenableFuture handleHttpResponse(Response response) { SafeArg.of("retries", maxRetries), SafeArg.of("status", response.code())); } + // not closing the final response body because ConjureBodySerde needs to read it to deserialize return Futures.immediateFuture(response); } diff --git a/dialogue-core/src/test/java/com/palantir/dialogue/core/RetryingChannelTest.java b/dialogue-core/src/test/java/com/palantir/dialogue/core/RetryingChannelTest.java index be3ce9eef..ffb43fc35 100644 --- a/dialogue-core/src/test/java/com/palantir/dialogue/core/RetryingChannelTest.java +++ b/dialogue-core/src/test/java/com/palantir/dialogue/core/RetryingChannelTest.java @@ -229,6 +229,35 @@ public void response_bodies_are_closed() throws Exception { verify(response2, times(1)).close(); } + @Test + public void final_exhausted_failure_response_body_is_not_closed() throws Exception { + TestResponse response1 = new TestResponse().code(503); + TestResponse response2 = new TestResponse().code(503); + TestResponse response3 = new TestResponse().code(503); + + when(channel.execute(any(), any())) + .thenReturn(Futures.immediateFuture(response1)) + .thenReturn(Futures.immediateFuture(response2)) + .thenReturn(Futures.immediateFuture(response3)); + + Channel retryer = new RetryingChannel( + channel, + "my-channel", + 2, + Duration.ZERO, + ClientConfiguration.ServerQoS.AUTOMATIC_RETRY, + ClientConfiguration.RetryOnTimeout.DISABLED); + ListenableFuture response = retryer.execute(TestEndpoint.POST, REQUEST); + assertThat(response.get(1, TimeUnit.SECONDS).code()).isEqualTo(503); + + assertThat(response1.isClosed()).isTrue(); + assertThat(response2.isClosed()).isTrue(); + assertThat(response3.isClosed()) + .describedAs("The last response must be left open so we can read the body" + + " and deserialize it into a structured error") + .isFalse(); + } + @Test public void testPropagatesCancel() { ListenableFuture delegateResult = SettableFuture.create(); From 747d8615ce39c225541e9ab24c4b3296480ed7a2 Mon Sep 17 00:00:00 2001 From: Dan Fox Date: Thu, 9 Apr 2020 00:44:05 +0100 Subject: [PATCH 4/6] one more static --- .../java/com/palantir/dialogue/core/RetryingChannel.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dialogue-core/src/main/java/com/palantir/dialogue/core/RetryingChannel.java b/dialogue-core/src/main/java/com/palantir/dialogue/core/RetryingChannel.java index f8c53c164..686ef734b 100644 --- a/dialogue-core/src/main/java/com/palantir/dialogue/core/RetryingChannel.java +++ b/dialogue-core/src/main/java/com/palantir/dialogue/core/RetryingChannel.java @@ -126,7 +126,7 @@ public ListenableFuture execute(Endpoint endpoint, Request request) { return delegate.execute(endpoint, request); } - private boolean isRetryable(Request request) { + private static boolean isRetryable(Request request) { Optional maybeBody = request.body(); return !maybeBody.isPresent() || maybeBody.get().repeatable(); } @@ -253,7 +253,8 @@ private ListenableFuture wrap(ListenableFuture input) { if (!shouldPropagateQos(serverQoS)) { result = Futures.transformAsync(result, this::handleHttpResponse, MoreExecutors.directExecutor()); } - result = Futures.catchingAsync(result, Throwable.class, this::handleThrowable, MoreExecutors.directExecutor()); + result = Futures.catchingAsync( + result, Throwable.class, this::handleThrowable, MoreExecutors.directExecutor()); return result; } } From 14fd0b3af7404795192630f829f9ac4f3ff551f6 Mon Sep 17 00:00:00 2001 From: Dan Fox Date: Wed, 8 Apr 2020 23:44:05 +0000 Subject: [PATCH 5/6] Add generated changelog entries --- changelog/@unreleased/pr-630.v2.yml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 changelog/@unreleased/pr-630.v2.yml diff --git a/changelog/@unreleased/pr-630.v2.yml b/changelog/@unreleased/pr-630.v2.yml new file mode 100644 index 000000000..cde8d77cf --- /dev/null +++ b/changelog/@unreleased/pr-630.v2.yml @@ -0,0 +1,6 @@ +type: fix +fix: + description: Structured errors are now correctly deserialized when requests exhaust + all available retries. (Previously they would always appear as an `UnknownRemoteException`.) + links: + - https://github.com/palantir/dialogue/pull/630 From 0e4758291c40cde98cad0e715a3693bf0865c083 Mon Sep 17 00:00:00 2001 From: Dan Fox Date: Thu, 9 Apr 2020 00:52:04 +0100 Subject: [PATCH 6/6] assert harder --- .../java/com/palantir/verification/IntegrationTest.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dialogue-client-verifier/src/test/java/com/palantir/verification/IntegrationTest.java b/dialogue-client-verifier/src/test/java/com/palantir/verification/IntegrationTest.java index 06a5954ff..7428f50ab 100644 --- a/dialogue-client-verifier/src/test/java/com/palantir/verification/IntegrationTest.java +++ b/dialogue-client-verifier/src/test/java/com/palantir/verification/IntegrationTest.java @@ -131,7 +131,11 @@ public void deserializes_a_conjure_error_after_exhausting_retries() { }; assertThatThrownBy(sampleServiceBlocking()::voidToVoid) - .isInstanceOf(RemoteException.class); + .isInstanceOf(RemoteException.class) + .satisfies(throwable -> { + assertThat(((RemoteException) throwable).getError().parameters()) + .containsEntry("numCalls", "4"); + }); assertThat(calls).describedAs("one initial call + 4 retries").hasValue(5); }