From 56aeb04a064218b845ecc193d530c341c6ec854d Mon Sep 17 00:00:00 2001 From: olaola Date: Thu, 8 Feb 2018 07:18:53 -0800 Subject: [PATCH] Fixing #4585: broken re-execution of orphaned actions. This is an important regression, we will want to patch the fix into 0.10 TESTED=fixed unit test, with A/B testing RELNOTES: Resolved an issue where a failure in the remote cache would not trigger local re-execution of an action. PiperOrigin-RevId: 184991670 --- .../build/lib/remote/GrpcRemoteCache.java | 60 ++++++------- .../build/lib/remote/RemoteSpawnRunner.java | 3 +- .../remote/GrpcRemoteExecutionClientTest.java | 87 ++++++++++++++++--- 3 files changed, 109 insertions(+), 41 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/remote/GrpcRemoteCache.java b/src/main/java/com/google/devtools/build/lib/remote/GrpcRemoteCache.java index d865d0c40b5d41..f0bc22b7b810d6 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/GrpcRemoteCache.java +++ b/src/main/java/com/google/devtools/build/lib/remote/GrpcRemoteCache.java @@ -178,11 +178,6 @@ public void ensureInputsPresent( * allow throwing such an exception. Any caller must make sure to catch the * {@link StatusRuntimeException}. Note that the retrier implicitly catches it, so if this is used * in the context of {@link RemoteRetrier#execute}, that's perfectly safe. - * - *

This method also converts any NOT_FOUND code returned from the server into a - * {@link CacheNotFoundException}. TODO(olaola): this is not enough. NOT_FOUND can also be raised - * by execute, in which case the server should return the missing digest in the Status.details - * field. This should be part of the API. */ private void readBlob(Digest digest, OutputStream stream) throws IOException, StatusRuntimeException { @@ -191,29 +186,29 @@ private void readBlob(Digest digest, OutputStream stream) resourceName += options.remoteInstanceName + "/"; } resourceName += "blobs/" + digest.getHash() + "/" + digest.getSizeBytes(); - try { - Iterator replies = bsBlockingStub() - .read(ReadRequest.newBuilder().setResourceName(resourceName).build()); - while (replies.hasNext()) { - replies.next().getData().writeTo(stream); - } - } catch (StatusRuntimeException e) { - if (e.getStatus().getCode() == Status.Code.NOT_FOUND) { - throw new CacheNotFoundException(digest); - } - throw e; + Iterator replies = bsBlockingStub() + .read(ReadRequest.newBuilder().setResourceName(resourceName).build()); + while (replies.hasNext()) { + replies.next().getData().writeTo(stream); } } @Override protected void downloadBlob(Digest digest, Path dest) throws IOException, InterruptedException { - retrier.execute( - () -> { - try (OutputStream stream = dest.getOutputStream()) { - readBlob(digest, stream); - } - return null; - }); + try { + retrier.execute( + () -> { + try (OutputStream stream = dest.getOutputStream()) { + readBlob(digest, stream); + } + return null; + }); + } catch (RetryException e) { + if (RemoteRetrierUtils.causedByStatus(e, Status.Code.NOT_FOUND)) { + throw new CacheNotFoundException(digest); + } + throw e; + } } @Override @@ -221,12 +216,19 @@ protected byte[] downloadBlob(Digest digest) throws IOException, InterruptedExce if (digest.getSizeBytes() == 0) { return new byte[0]; } - return retrier.execute( - () -> { - ByteArrayOutputStream stream = new ByteArrayOutputStream((int) digest.getSizeBytes()); - readBlob(digest, stream); - return stream.toByteArray(); - }); + try { + return retrier.execute( + () -> { + ByteArrayOutputStream stream = new ByteArrayOutputStream((int) digest.getSizeBytes()); + readBlob(digest, stream); + return stream.toByteArray(); + }); + } catch (RetryException e) { + if (RemoteRetrierUtils.causedByStatus(e, Status.Code.NOT_FOUND)) { + throw new CacheNotFoundException(digest); + } + throw e; + } } @Override diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java index e0ba1fb4f406c7..f8ad0879025f92 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java @@ -245,7 +245,8 @@ private SpawnResult handleError(IOException exception, FileOutErr outErr) throws if (exception instanceof RetryException && RemoteRetrierUtils.causedByStatus((RetryException) exception, Code.UNAVAILABLE)) { status = Status.EXECUTION_FAILED_CATASTROPHICALLY; - } else if (cause instanceof CacheNotFoundException) { + } else if (exception instanceof CacheNotFoundException + || cause instanceof CacheNotFoundException) { status = Status.REMOTE_CACHE_FAILED; } else { status = Status.EXECUTION_FAILED; diff --git a/src/test/java/com/google/devtools/build/lib/remote/GrpcRemoteExecutionClientTest.java b/src/test/java/com/google/devtools/build/lib/remote/GrpcRemoteExecutionClientTest.java index c278581c2cbe8d..7167f2f48a8615 100644 --- a/src/test/java/com/google/devtools/build/lib/remote/GrpcRemoteExecutionClientTest.java +++ b/src/test/java/com/google/devtools/build/lib/remote/GrpcRemoteExecutionClientTest.java @@ -259,6 +259,7 @@ public PathFragment getExecPath() { @After public void tearDown() throws Exception { fakeServer.shutdownNow(); + fakeServer.awaitTermination(); } @Test @@ -791,6 +792,66 @@ public void read(ReadRequest request, StreamObserver responseObser } } + @Test + public void passRepeatedOrphanedCacheMissErrorWithStackTrace() throws Exception { + final Digest stdOutDigest = DIGEST_UTIL.computeAsUtf8("bloo"); + final ActionResult actionResult = + ActionResult.newBuilder().setStdoutDigest(stdOutDigest).build(); + serviceRegistry.addService( + new ActionCacheImplBase() { + @Override + public void getActionResult( + GetActionResultRequest request, StreamObserver responseObserver) { + responseObserver.onNext(actionResult); + responseObserver.onCompleted(); + } + }); + serviceRegistry.addService( + new ExecutionImplBase() { + @Override + public void execute(ExecuteRequest request, StreamObserver responseObserver) { + responseObserver.onNext( + Operation.newBuilder() + .setDone(true) + .setResponse( + Any.pack(ExecuteResponse.newBuilder().setResult(actionResult).build())) + .build()); + responseObserver.onCompleted(); + } + }); + serviceRegistry.addService( + new ContentAddressableStorageImplBase() { + @Override + public void findMissingBlobs( + FindMissingBlobsRequest request, + StreamObserver responseObserver) { + responseObserver.onNext(FindMissingBlobsResponse.getDefaultInstance()); + responseObserver.onCompleted(); + } + }); + serviceRegistry.addService( + new ByteStreamImplBase() { + @Override + public void read(ReadRequest request, StreamObserver responseObserver) { + assertThat(request.getResourceName().contains(stdOutDigest.getHash())).isTrue(); + responseObserver.onError(Status.NOT_FOUND.asRuntimeException()); + } + }); + + try { + client.exec(simpleSpawn, simplePolicy); + fail("Expected an exception"); + } catch (SpawnExecException expected) { + assertThat(expected.getSpawnResult().status()) + .isEqualTo(SpawnResult.Status.REMOTE_CACHE_FAILED); + assertThat(expected).hasMessageThat().contains(stdOutDigest.getHash()); + // Ensure we also got back the stack trace. + assertThat(expected) + .hasMessageThat() + .contains("passRepeatedOrphanedCacheMissErrorWithStackTrace"); + } + } + @Test public void remotelyReExecuteOrphanedCachedActions() throws Exception { final Digest stdOutDigest = DIGEST_UTIL.computeAsUtf8("stdout"); @@ -807,10 +868,19 @@ public void getActionResult( }); serviceRegistry.addService( new ByteStreamImplBase() { + private boolean first = true; + @Override public void read(ReadRequest request, StreamObserver responseObserver) { - // All reads are a cache miss. - responseObserver.onError(Status.NOT_FOUND.asRuntimeException()); + // First read is a cache miss, next read succeeds. + if (first) { + first = false; + responseObserver.onError(Status.NOT_FOUND.asRuntimeException()); + } else { + responseObserver.onNext( + ReadResponse.newBuilder().setData(ByteString.copyFromUtf8("stdout")).build()); + responseObserver.onCompleted(); + } } @Override @@ -858,14 +928,9 @@ public void findMissingBlobs( } }); - try { - client.exec(simpleSpawn, simplePolicy); - fail("Expected an exception"); - } catch (ExecException expected) { - assertThat(expected).hasMessageThat().contains("Missing digest"); - assertThat(expected) - .hasMessageThat() - .contains(DIGEST_UTIL.computeAsUtf8("stdout").toString()); - } + SpawnResult result = client.exec(simpleSpawn, simplePolicy); + assertThat(result.setupSuccess()).isTrue(); + assertThat(result.exitCode()).isEqualTo(0); + assertThat(outErr.outAsLatin1()).isEqualTo("stdout"); } }