diff --git a/src/main/java/com/google/devtools/build/lib/remote/BulkTransferException.java b/src/main/java/com/google/devtools/build/lib/remote/BulkTransferException.java new file mode 100644 index 00000000000000..6bc88dc8c9c0a4 --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/remote/BulkTransferException.java @@ -0,0 +1,53 @@ +// Copyright 2020 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package com.google.devtools.build.lib.remote; + +import com.google.common.base.Preconditions; +import com.google.devtools.build.lib.remote.common.CacheNotFoundException; +import java.io.IOException; + +/** + * Exception which represents a collection of IOExceptions for the purpose + * of distinguishing remote communication exceptions from those which occur + * on filesystems locally. This exception serves as a trace point for the actual + * transfer, so that the intended operation can be observed in a stack, with all + * constituent exceptions available for observation. + */ +class BulkTransferException extends IOException { + // true since no empty BulkTransferException is ever thrown + private boolean allCacheNotFoundException = true; + + BulkTransferException() { + } + + BulkTransferException(IOException e) { + add(e); + } + + /** + * Add an IOException to the suppressed list. + * + * The Java standard addSuppressed is final and this method stands in + * its place to selectively filter and record whether all suppressed + * exceptions are CacheNotFoundExceptions + */ + void add(IOException e) { + allCacheNotFoundException &= e instanceof CacheNotFoundException; + super.addSuppressed(e); + } + + boolean onlyCausedByCacheNotFoundException() { + return allCacheNotFoundException; + } +} diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteCache.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteCache.java index 4df91008164686..1d37272c24e5d6 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteCache.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteCache.java @@ -14,6 +14,7 @@ package com.google.devtools.build.lib.remote; import static com.google.common.util.concurrent.MoreExecutors.directExecutor; +import static com.google.devtools.build.lib.remote.util.Utils.getFromFuture; import build.bazel.remote.execution.v2.Action; import build.bazel.remote.execution.v2.ActionResult; @@ -34,6 +35,7 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.util.concurrent.FutureCallback; import com.google.common.util.concurrent.Futures; @@ -57,7 +59,6 @@ import com.google.devtools.build.lib.remote.common.RemoteCacheClient.ActionKey; import com.google.devtools.build.lib.remote.options.RemoteOptions; import com.google.devtools.build.lib.remote.util.DigestUtil; -import com.google.devtools.build.lib.remote.util.Utils; import com.google.devtools.build.lib.remote.util.Utils.InMemoryOutput; import com.google.devtools.build.lib.util.io.FileOutErr; import com.google.devtools.build.lib.util.io.OutErr; @@ -116,7 +117,7 @@ public RemoteCache( public ActionResult downloadActionResult(ActionKey actionKey) throws IOException, InterruptedException { - return Utils.getFromFuture(cacheProtocol.downloadActionResult(actionKey)); + return getFromFuture(cacheProtocol.downloadActionResult(actionKey)); } /** @@ -182,7 +183,7 @@ private void uploadOutputs( digests.addAll(digestToBlobs.keySet()); ImmutableSet digestsToUpload = - Utils.getFromFuture(cacheProtocol.findMissingDigests(digests)); + getFromFuture(cacheProtocol.findMissingDigests(digests)); ImmutableList.Builder> uploads = ImmutableList.builder(); for (Digest digest : digestsToUpload) { Path file = digestToFile.get(digest); @@ -198,7 +199,7 @@ private void uploadOutputs( } } - waitForUploads(uploads.build()); + waitForBulkTransfer(uploads.build(), /* cancelRemainingOnInterrupt=*/ false); if (manifest.getStderrDigest() != null) { result.setStderrDigest(manifest.getStderrDigest()); @@ -208,22 +209,44 @@ private void uploadOutputs( } } - private static void waitForUploads(List> uploads) - throws IOException, InterruptedException { - try { - for (ListenableFuture upload : uploads) { - upload.get(); + protected static void waitForBulkTransfer(Iterable> transfers, boolean cancelRemainingOnInterrupt) + throws BulkTransferException, InterruptedException { + BulkTransferException bulkTransferException = null; + InterruptedException interruptedException = null; + boolean interrupted = Thread.currentThread().isInterrupted(); + for (ListenableFuture transfer : transfers) { + try { + if (interruptedException == null) { + // Wait for all downloads to finish. + getFromFuture(transfer); + } else { + transfer.cancel(true); + } + } catch (IOException e) { + if (bulkTransferException == null) { + bulkTransferException = new BulkTransferException(); + } + bulkTransferException.add(e); + } catch (InterruptedException e) { + interrupted = Thread.interrupted() || interrupted; + interruptedException = e; + if (!cancelRemainingOnInterrupt) { + // leave the rest of the transfers alone + break; + } } - } catch (ExecutionException e) { - // TODO(buchgr): Add support for cancellation and factor this method out to be shared - // between ByteStreamUploader as well. - Throwable cause = e.getCause(); - Throwables.throwIfInstanceOf(cause, IOException.class); - Throwables.throwIfInstanceOf(cause, InterruptedException.class); - if (cause != null) { - throw new IOException(cause); + } + if (interrupted) { + Thread.currentThread().interrupt(); + } + if (interruptedException != null) { + if (bulkTransferException != null) { + interruptedException.addSuppressed(bulkTransferException); } - throw new IOException(e); + throw interruptedException; + } + if (bulkTransferException != null) { + throw bulkTransferException; } } @@ -299,40 +322,16 @@ public void download( // Subsequently we need to wait for *every* download to finish, even if we already know that // one failed. That's so that when exiting this method we can be sure that all downloads have // finished and don't race with the cleanup routine. - // TODO(buchgr): Look into cancellation. - IOException downloadException = null; - InterruptedException interruptedException = null; FileOutErr tmpOutErr = null; - try { - if (origOutErr != null) { - tmpOutErr = origOutErr.childOutErr(); - } - downloads.addAll(downloadOutErr(result, tmpOutErr)); - } catch (IOException e) { - downloadException = e; - } - - for (ListenableFuture download : downloads) { - try { - // Wait for all downloads to finish. - getFromFuture(download); - } catch (IOException e) { - if (downloadException == null) { - downloadException = e; - } else if (e != downloadException) { - downloadException.addSuppressed(e); - } - } catch (InterruptedException e) { - if (interruptedException == null) { - interruptedException = e; - } else if (e != interruptedException) { - interruptedException.addSuppressed(e); - } - } + if (origOutErr != null) { + tmpOutErr = origOutErr.childOutErr(); } + downloads.addAll(downloadOutErr(result, tmpOutErr)); - if (downloadException != null || interruptedException != null) { + try { + waitForBulkTransfer(downloads, /* cancelRemainingOnInterrupt=*/ true); + } catch (Exception e) { try { // Delete any (partially) downloaded output files. for (OutputFile file : result.getOutputFilesList()) { @@ -347,27 +346,17 @@ public void download( tmpOutErr.clearOut(); tmpOutErr.clearErr(); } - } catch (IOException e) { - if (downloadException != null && e != downloadException) { - e.addSuppressed(downloadException); - } - if (interruptedException != null) { - e.addSuppressed(interruptedException); - } + } catch (IOException ioEx) { + ioEx.addSuppressed(e); // If deleting of output files failed, we abort the build with a decent error message as // any subsequent local execution failure would likely be incomprehensible. - throw new EnvironmentalExecException( - "Failed to delete output files after incomplete download", e); + ExecException execEx = new EnvironmentalExecException( + "Failed to delete output files after incomplete download", ioEx); + execEx.addSuppressed(e); + throw execEx; } - } - - if (interruptedException != null) { - throw interruptedException; - } - - if (downloadException != null) { - throw downloadException; + throw e; } if (tmpOutErr != null) { @@ -487,12 +476,15 @@ public void onFailure(Throwable t) { return outerF; } - private List> downloadOutErr(ActionResult result, OutErr outErr) - throws IOException { + private List> downloadOutErr(ActionResult result, OutErr outErr) { List> downloads = new ArrayList<>(); if (!result.getStdoutRaw().isEmpty()) { - result.getStdoutRaw().writeTo(outErr.getOutputStream()); - outErr.getOutputStream().flush(); + try { + result.getStdoutRaw().writeTo(outErr.getOutputStream()); + outErr.getOutputStream().flush(); + } catch (IOException e) { + downloads.add(Futures.immediateFailedFuture(e)); + } } else if (result.hasStdoutDigest()) { downloads.add( Futures.transform( @@ -501,8 +493,12 @@ private List> downloadOutErr(ActionResult result, directExecutor())); } if (!result.getStderrRaw().isEmpty()) { - result.getStderrRaw().writeTo(outErr.getErrorStream()); - outErr.getErrorStream().flush(); + try { + result.getStderrRaw().writeTo(outErr.getErrorStream()); + outErr.getErrorStream().flush(); + } catch (IOException e) { + downloads.add(Futures.immediateFailedFuture(e)); + } } else if (result.hasStderrDigest()) { downloads.add( Futures.transform( @@ -1115,9 +1111,4 @@ public Collection symlinks() { return symlinks.values(); } } - - @VisibleForTesting - protected T getFromFuture(ListenableFuture f) throws IOException, InterruptedException { - return Utils.getFromFuture(f); - } } diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteExecutionCache.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteExecutionCache.java index 4d9033ba8c1b63..56b3e40354cb39 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteExecutionCache.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteExecutionCache.java @@ -13,6 +13,7 @@ // limitations under the License. package com.google.devtools.build.lib.remote; +import static com.google.devtools.build.lib.remote.util.Utils.getFromFuture; import static java.lang.String.format; import build.bazel.remote.execution.v2.Digest; @@ -57,21 +58,7 @@ private void uploadMissing(Map files, Map blob uploads.add(cacheProtocol.uploadBlob(entry.getKey(), entry.getValue())); } - try { - for (ListenableFuture upload : uploads) { - upload.get(); - } - } catch (ExecutionException e) { - // Cancel remaining uploads. - for (ListenableFuture upload : uploads) { - upload.cancel(/* mayInterruptIfRunning= */ true); - } - - Throwable cause = e.getCause(); - Throwables.propagateIfPossible(cause, IOException.class); - Throwables.propagateIfPossible(cause, InterruptedException.class); - throw new IOException(cause); - } + waitForBulkTransfer(uploads, /* cancelRemainingOnInterrupt=*/ false); } /** @@ -91,7 +78,7 @@ public void ensureInputsPresent(MerkleTree merkleTree, Map addi Iterable allDigests = Iterables.concat(merkleTree.getAllDigests(), additionalInputs.keySet()); ImmutableSet missingDigests = - Utils.getFromFuture(cacheProtocol.findMissingDigests(allDigests)); + getFromFuture(cacheProtocol.findMissingDigests(allDigests)); Map filesToUpload = new HashMap<>(); Map blobsToUpload = new HashMap<>(); for (Digest missingDigest : missingDigests) { diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java index e6c4e0e135852c..baf638e0faeabd 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java @@ -103,8 +103,9 @@ public class RemoteSpawnRunner implements SpawnRunner { private static final String VIOLATION_TYPE_MISSING = "MISSING"; private static boolean retriableExecErrors(Exception e) { - if (e instanceof CacheNotFoundException || e.getCause() instanceof CacheNotFoundException) { - return true; + if (e instanceof BulkTransferException) { + BulkTransferException bulkTransferException = (BulkTransferException) e; + return bulkTransferException.onlyCausedByCacheNotFoundException(); } if (!RemoteRetrierUtils.causedByStatus(e, Code.FAILED_PRECONDITION)) { return false; @@ -244,7 +245,10 @@ public SpawnResult exec(Spawn spawn, SpawnExecutionContext context) try { return downloadAndFinalizeSpawnResult( cachedResult, /* cacheHit= */ true, spawn, context, remoteOutputsMode); - } catch (CacheNotFoundException e) { + } catch (BulkTransferException e) { + if (!e.onlyCausedByCacheNotFoundException()) { + throw e; + } // No cache hit, so we fall through to local or remote execution. // We set acceptCachedResult to false in order to force the action re-execution. acceptCachedResult = false; @@ -304,10 +308,12 @@ public SpawnResult exec(Spawn spawn, SpawnExecutionContext context) try { return downloadAndFinalizeSpawnResult( actionResult, reply.getCachedResult(), spawn, context, remoteOutputsMode); - } catch (CacheNotFoundException e) { - // No cache hit, so if we retry this execution, we must no longer accept - // cached results, it must be reexecuted - requestBuilder.setSkipCacheLookup(true); + } catch (BulkTransferException e) { + if (e.onlyCausedByCacheNotFoundException()) { + // No cache hit, so if we retry this execution, we must no longer accept + // cached results, it must be reexecuted + requestBuilder.setSkipCacheLookup(true); + } throw e; } }); @@ -444,14 +450,22 @@ private SpawnResult execLocallyAndUploadOrFail( private SpawnResult handleError( IOException exception, FileOutErr outErr, ActionKey actionKey, SpawnExecutionContext context) throws ExecException, InterruptedException, IOException { - if (exception.getCause() instanceof ExecutionStatusException) { + boolean remoteCacheFailed = false; + if (exception instanceof BulkTransferException) { + BulkTransferException e = (BulkTransferException) exception; + remoteCacheFailed = e.onlyCausedByCacheNotFoundException(); + } if (exception.getCause() instanceof ExecutionStatusException) { ExecutionStatusException e = (ExecutionStatusException) exception.getCause(); if (e.getResponse() != null) { ExecuteResponse resp = e.getResponse(); maybeDownloadServerLogs(resp, actionKey); if (resp.hasResult()) { - // We try to download all (partial) results even on server error, for debuggability. - remoteCache.download(resp.getResult(), execRoot, outErr, context::lockOutputFiles); + try { + // We try to download all (partial) results even on server error, for debuggability. + remoteCache.download(resp.getResult(), execRoot, outErr, context::lockOutputFiles); + } catch (BulkTransferException bulkTransferEx) { + exception.addSuppressed(bulkTransferEx); + } } } if (e.isExecutionTimeout()) { @@ -465,7 +479,7 @@ private SpawnResult handleError( final Status status; if (RemoteRetrierUtils.causedByStatus(exception, Code.UNAVAILABLE)) { status = Status.EXECUTION_FAILED_CATASTROPHICALLY; - } else if (exception instanceof CacheNotFoundException) { + } else if (remoteCacheFailed) { status = Status.REMOTE_CACHE_FAILED; } else { status = Status.EXECUTION_FAILED; diff --git a/src/test/java/com/google/devtools/build/lib/remote/RemoteCacheTests.java b/src/test/java/com/google/devtools/build/lib/remote/RemoteCacheTests.java index 0606c1f0de0de5..cf5776c16b9b34 100644 --- a/src/test/java/com/google/devtools/build/lib/remote/RemoteCacheTests.java +++ b/src/test/java/com/google/devtools/build/lib/remote/RemoteCacheTests.java @@ -52,6 +52,7 @@ import com.google.devtools.build.lib.actions.cache.MetadataInjector; import com.google.devtools.build.lib.actions.util.ActionsTestUtil; import com.google.devtools.build.lib.clock.JavaClock; +import com.google.devtools.build.lib.remote.BulkTransferException; import com.google.devtools.build.lib.remote.RemoteCache.OutputFilesLocker; import com.google.devtools.build.lib.remote.RemoteCache.UploadManifest; import com.google.devtools.build.lib.remote.common.RemoteCacheClient.ActionKey; @@ -643,7 +644,7 @@ public void downloadFailureMaintainsDirectories() throws Exception { OutputFile.newBuilder().setPath("outputdir/outputfile").setDigest(outputFileDigest)); result.addOutputFiles(OutputFile.newBuilder().setPath("otherfile").setDigest(otherFileDigest)); assertThrows( - IOException.class, () -> cache.download(result.build(), execRoot, null, outputFilesLocker)); + BulkTransferException.class, () -> cache.download(result.build(), execRoot, null, outputFilesLocker)); assertThat(cache.getNumFailedDownloads()).isEqualTo(1); assertThat(execRoot.getRelative("outputdir").exists()).isTrue(); assertThat(execRoot.getRelative("outputdir/outputfile").exists()).isFalse(); @@ -672,15 +673,17 @@ public void onErrorWaitForRemainingDownloadsToComplete() throws Exception { .addOutputFiles(OutputFile.newBuilder().setPath("file2").setDigest(digest2)) .addOutputFiles(OutputFile.newBuilder().setPath("file3").setDigest(digest3)) .build(); - IOException e = + BulkTransferException downloadException = assertThrows( - IOException.class, + BulkTransferException.class, () -> cache.download( result, execRoot, new FileOutErr(stdout, stderr), outputFilesLocker)); - assertThat(e.getSuppressed()).isEmpty(); + assertThat(downloadException.getSuppressed()).hasLength(1); assertThat(cache.getNumSuccessfulDownloads()).isEqualTo(2); assertThat(cache.getNumFailedDownloads()).isEqualTo(1); + assertThat(downloadException.getSuppressed()[0]).isInstanceOf(IOException.class); + IOException e = (IOException) downloadException.getSuppressed()[0]; assertThat(Throwables.getRootCause(e)).hasMessageThat().isEqualTo("download failed"); verify(outputFilesLocker, never()).lock(); } @@ -702,17 +705,18 @@ public void downloadWithMultipleErrorsAddsThemAsSuppressed() throws Exception { .addOutputFiles(OutputFile.newBuilder().setPath("file2").setDigest(digest2)) .addOutputFiles(OutputFile.newBuilder().setPath("file3").setDigest(digest3)) .build(); - IOException e = + BulkTransferException e = assertThrows( - IOException.class, + BulkTransferException.class, () -> cache.download( result, execRoot, new FileOutErr(stdout, stderr), outputFilesLocker)); - assertThat(e.getSuppressed()).hasLength(1); + assertThat(e.getSuppressed()).hasLength(2); assertThat(e.getSuppressed()[0]).isInstanceOf(IOException.class); - assertThat(e.getSuppressed()[0]).hasMessageThat().isEqualTo("file3 failed"); - assertThat(Throwables.getRootCause(e)).hasMessageThat().isEqualTo("file2 failed"); + assertThat(e.getSuppressed()[0]).hasMessageThat().isAnyOf("file2 failed", "file3 failed"); + assertThat(e.getSuppressed()[1]).isInstanceOf(IOException.class); + assertThat(e.getSuppressed()[1]).hasMessageThat().isAnyOf("file2 failed", "file3 failed"); } @Test @@ -733,15 +737,18 @@ public void downloadWithDuplicateIOErrorsDoesNotSuppress() throws Exception { .addOutputFiles(OutputFile.newBuilder().setPath("file2").setDigest(digest2)) .addOutputFiles(OutputFile.newBuilder().setPath("file3").setDigest(digest3)) .build(); - IOException e = + BulkTransferException downloadException = assertThrows( - IOException.class, + BulkTransferException.class, () -> cache.download( result, execRoot, new FileOutErr(stdout, stderr), outputFilesLocker)); - assertThat(e.getSuppressed()).isEmpty(); - assertThat(Throwables.getRootCause(e)).hasMessageThat().isEqualTo("reused io exception"); + for (Throwable t : downloadException.getSuppressed()) { + assertThat(t).isInstanceOf(IOException.class); + IOException e = (IOException) t; + assertThat(Throwables.getRootCause(e)).hasMessageThat().isEqualTo("reused io exception"); + } } @Test @@ -838,7 +845,7 @@ public void testDownloadWithStdoutStderrOnFailure() throws Exception { .setStderrDigest(digestStderr) .build(); assertThrows( - IOException.class, () -> cache.download(result, execRoot, spyOutErr, outputFilesLocker)); + BulkTransferException.class, () -> cache.download(result, execRoot, spyOutErr, outputFilesLocker)); verify(spyOutErr, Mockito.times(2)).childOutErr(); verify(spyChildOutErr).clearOut(); verify(spyChildOutErr).clearErr(); diff --git a/src/test/java/com/google/devtools/build/lib/remote/RemoteSpawnRunnerTest.java b/src/test/java/com/google/devtools/build/lib/remote/RemoteSpawnRunnerTest.java index 8bd2f57041c5d6..5c421de273ac76 100644 --- a/src/test/java/com/google/devtools/build/lib/remote/RemoteSpawnRunnerTest.java +++ b/src/test/java/com/google/devtools/build/lib/remote/RemoteSpawnRunnerTest.java @@ -70,6 +70,7 @@ import com.google.devtools.build.lib.exec.SpawnRunner; import com.google.devtools.build.lib.exec.SpawnRunner.SpawnExecutionContext; import com.google.devtools.build.lib.exec.util.FakeOwner; +import com.google.devtools.build.lib.remote.BulkTransferException; import com.google.devtools.build.lib.remote.common.CacheNotFoundException; import com.google.devtools.build.lib.remote.common.RemoteCacheClient.ActionKey; import com.google.devtools.build.lib.remote.options.RemoteOptions; @@ -592,7 +593,7 @@ public void cacheDownloadFailureTriggersRemoteExecution() throws Exception { ActionResult cachedResult = ActionResult.newBuilder().setExitCode(0).build(); when(cache.downloadActionResult(any(ActionKey.class))).thenReturn(cachedResult); - Exception downloadFailure = new CacheNotFoundException(Digest.getDefaultInstance()); + Exception downloadFailure = new BulkTransferException(new CacheNotFoundException(Digest.getDefaultInstance())); doThrow(downloadFailure) .when(cache) .download(eq(cachedResult), any(Path.class), any(FileOutErr.class), any()); @@ -628,7 +629,7 @@ public void resultsDownloadFailureTriggersRemoteExecutionWithSkipCacheLookup() t when(executor.executeRemotely(any(ExecuteRequest.class))) .thenReturn(cachedResponse) .thenReturn(executedResponse); - Exception downloadFailure = new CacheNotFoundException(Digest.getDefaultInstance()); + Exception downloadFailure = new BulkTransferException(new CacheNotFoundException(Digest.getDefaultInstance())); doThrow(downloadFailure) .when(cache) .download(eq(cachedResult), any(Path.class), any(FileOutErr.class), any());