diff --git a/src/main/java/com/google/devtools/build/lib/bugreport/BugReport.java b/src/main/java/com/google/devtools/build/lib/bugreport/BugReport.java
index d7bc9a59939447..122ef16c2d023c 100644
--- a/src/main/java/com/google/devtools/build/lib/bugreport/BugReport.java
+++ b/src/main/java/com/google/devtools/build/lib/bugreport/BugReport.java
@@ -38,6 +38,7 @@
import java.lang.management.ManagementFactory;
import java.util.Arrays;
import java.util.List;
+import java.util.concurrent.atomic.AtomicBoolean;
import java.util.logging.Level;
import javax.annotation.Nullable;
import javax.annotation.concurrent.GuardedBy;
@@ -61,13 +62,31 @@ public final class BugReport {
private static BlazeRuntimeInterface runtime = null;
+ /**
+ * The most recent throwable that reported a crash; used only for testing purposes where the JVM
+ * will not halt.
+ */
@SuppressWarnings("StaticAssignmentOfThrowable")
@GuardedBy("LOCK")
@Nullable
private static Throwable lastCrashingThrowable = null;
+ /**
+ * A global lock object, used with {@code synchronized} blocks to provide thread-safe access to
+ * {@link BugReport#lastCrashingThrowable}.
+ */
private static final Object LOCK = new Object();
+ /**
+ * Flag set by {@link #handleCrash(Crash, CrashContext)} using {@link
+ * AtomicBoolean#compareAndSet(boolean, boolean)} to determine which thread "wins" racing crashes.
+ *
+ *
All concurrent calls to {@link #handleCrash(Crash, CrashContext)} will be logged, but only
+ * the thread which sets this flag {@code true} will propagate the crash to exit data and to each
+ * {@code BlazeModule}.
+ */
+ private static final AtomicBoolean alreadyHandlingCrash = new AtomicBoolean(false);
+
private static final boolean SHOULD_NOT_SEND_BUG_REPORT_BECAUSE_IN_TEST =
TestType.isInTest() && System.getenv("ENABLE_BUG_REPORT_LOGGING_IN_TEST") == null;
@@ -188,7 +207,7 @@ public static void sendBugReport(@FormatString String message, Object... args) {
* report} without additional arguments.
*/
public static void sendBugReport(Throwable exception) {
- sendBugReport(exception, /*args=*/ ImmutableList.of());
+ sendBugReport(exception, /* args= */ ImmutableList.of());
}
/**
@@ -199,12 +218,12 @@ public static void sendBugReport(Throwable exception) {
* @param values Additional string values to clarify the exception.
*/
public static void sendBugReport(Throwable exception, List args, String... values) {
- sendBugReportInternal(exception, /*isFatal=*/ true, filterArgs(args), values);
+ sendBugReportInternal(exception, /* isFatal= */ true, filterArgs(args), values);
}
/** Logs the bug report, indicating it is not a crash. */
public static void sendNonFatalBugReport(Throwable exception) {
- sendBugReportInternal(exception, /*isFatal=*/ false, /*args=*/ ImmutableList.of());
+ sendBugReportInternal(exception, /* isFatal= */ false, /* args= */ ImmutableList.of());
}
private static void sendBugReportInternal(
@@ -240,43 +259,45 @@ public static RuntimeException handleCrash(Throwable throwable, String... args)
* Otherwise, for {@link CrashContext#keepAlive}, returns {@code null}, in which case the caller
* is responsible for shutting down the server.
*/
- @SuppressWarnings("StaticAssignmentOfThrowable")
+ @SuppressWarnings({"StaticAssignmentOfThrowable", "LogAndThrow"})
public static void handleCrash(Crash crash, CrashContext ctx) {
- int numericExitCode = crash.getDetailedExitCode().getExitCode().getNumericExitCode();
Throwable throwable = crash.getThrowable();
if (runtime != null) {
runtime.fillInCrashContext(ctx);
}
- try {
- synchronized (LOCK) {
- logger.atSevere().withCause(throwable).log("Handling crash with %s", ctx);
- if (TestType.isInTest()) {
+ boolean isFirstCrash = alreadyHandlingCrash.compareAndSet(false, true);
+ logger.atSevere().withCause(throwable).log(
+ "Handling crash (isFirstCrash = %s) with CrashContext %s", isFirstCrash, ctx);
+ try {
+ if (TestType.isInTest()) {
+ synchronized (LOCK) {
lastCrashingThrowable = throwable;
}
+ }
- String crashMsg;
- String heapDumpPath;
- // Might be a wrapped OOM - the detailed exit code reflects the root cause.
- boolean isOom = crash.getDetailedExitCode().getExitCode().equals(ExitCode.OOM_ERROR);
- if (isOom) {
- crashMsg = constructOomExitMessage(ctx.getExtraOomInfo());
- heapDumpPath = ctx.getHeapDumpPath();
- if (heapDumpPath != null) {
- crashMsg += " An attempt will be made to write a heap dump to " + heapDumpPath + ".";
- }
- } else {
- crashMsg = getProductName() + " crashed due to an internal error.";
- heapDumpPath = null;
+ String crashMsg;
+ String heapDumpPath;
+ // Might be a wrapped OOM - the detailed exit code reflects the root cause.
+ boolean isOom = crash.getDetailedExitCode().getExitCode().equals(ExitCode.OOM_ERROR);
+ if (isOom) {
+ crashMsg = constructOomExitMessage(ctx.getExtraOomInfo());
+ heapDumpPath = ctx.getHeapDumpPath();
+ if (heapDumpPath != null) {
+ crashMsg += " An attempt will be made to write a heap dump to " + heapDumpPath + ".";
}
- crashMsg += " Printing stack trace:\n" + Throwables.getStackTraceAsString(throwable);
- ctx.getEventHandler().handle(Event.fatal(crashMsg));
-
+ } else {
+ crashMsg = getProductName() + " crashed due to an internal error.";
+ heapDumpPath = null;
+ }
+ crashMsg += " Printing stack trace:\n" + Throwables.getStackTraceAsString(throwable);
+ ctx.getEventHandler().handle(Event.fatal(crashMsg));
+ if (isFirstCrash) {
try {
// Emit exit data before sending a bug report. Bug reports involve an RPC, and given that
// we are crashing, who knows if it will complete. It's more important that we write
// exit code and failure detail information so that the crash can be handled correctly.
- emitExitData(crash, ctx, numericExitCode, heapDumpPath);
+ emitExitData(crash, ctx, heapDumpPath);
// Skip sending a bug report if the crash is an OOM - attempting an RPC while out of
// memory can cause issues. Also, don't try to send a bug report during a crash in a test,
// it will throw itself.
@@ -284,13 +305,14 @@ public static void handleCrash(Crash crash, CrashContext ctx) {
sendBugReport(throwable, ctx.getArgs());
}
} finally {
- if (ctx.shouldHaltJvm()) {
- // Avoid shutdown deadlock issues: If an application shutdown hook crashes, it will
- // trigger our Blaze crash handler (this method). Calling System#exit() here, would
- // therefore induce a deadlock. This call would block on the shutdown sequence
- // completing, but the shutdown sequence would in turn be blocked on this thread
- // finishing. Instead, exit fast via halt().
- Runtime.getRuntime().halt(numericExitCode);
+ try {
+ haltJvmIfRequested(crash, ctx);
+ } finally {
+ // Reset the global `alreadyHandlingCrash` flag while testing, so subsequent tests can
+ // exercise BugReport#handleCrash(). (Note that Runtime#halt() throws in tests.)
+ if (TestType.isInTest()) {
+ alreadyHandlingCrash.set(false);
+ }
}
}
}
@@ -309,9 +331,7 @@ public static void handleCrash(Crash crash, CrashContext ctx) {
System.err.println("Exception encountered during BugReport#handleCrash:");
t.printStackTrace(System.err);
} finally {
- if (ctx.shouldHaltJvm()) {
- Runtime.getRuntime().halt(numericExitCode);
- }
+ haltJvmIfRequested(crash, ctx);
}
if (!ctx.shouldHaltJvm()) {
return;
@@ -320,14 +340,26 @@ public static void handleCrash(Crash crash, CrashContext ctx) {
throw new IllegalStateException("Should have halted", throwable);
}
+ private static void haltJvmIfRequested(Crash crash, CrashContext ctx) {
+ if (ctx.shouldHaltJvm()) {
+ // Avoid shutdown deadlock issues: If an application shutdown hook crashes, it will
+ // trigger our Blaze crash handler (this method). Calling System#exit() here, would
+ // therefore induce a deadlock. This call would block on the shutdown sequence
+ // completing, but the shutdown sequence would in turn be blocked on this thread
+ // finishing. Instead, exit fast via halt().
+ Runtime.getRuntime().halt(crash.getDetailedExitCode().getExitCode().getNumericExitCode());
+ }
+ }
+
/**
* Writes exit status files, dumps heap if requested, and calls {@link
* BlazeRuntimeInterface#cleanUpForCrash}.
*/
- private static void emitExitData(
- Crash crash, CrashContext ctx, int numericExitCode, @Nullable String heapDumpPath) {
+ private static void emitExitData(Crash crash, CrashContext ctx, @Nullable String heapDumpPath) {
// Writing the exit code status file is only necessary if we are halting. Otherwise, the
// caller is responsible for an orderly shutdown with the proper exit code.
+ DetailedExitCode detailedExitCode = crash.getDetailedExitCode();
+ int numericExitCode = detailedExitCode.getExitCode().getNumericExitCode();
if (ctx.shouldHaltJvm()) {
if (CustomExitCodePublisher.maybeWriteExitStatusFile(numericExitCode)) {
logger.atInfo().log("Wrote exit status file.");
@@ -337,7 +369,7 @@ private static void emitExitData(
}
if (CustomFailureDetailPublisher.maybeWriteFailureDetailFile(
- crash.getDetailedExitCode().getFailureDetail())) {
+ detailedExitCode.getFailureDetail())) {
logger.atInfo().log("Wrote failure detail file.");
} else {
logger.atWarning().log("Did not write failure detail file; check stderr for errors.");
@@ -354,7 +386,7 @@ private static void emitExitData(
}
if (runtime != null) {
- runtime.cleanUpForCrash(crash.getDetailedExitCode());
+ runtime.cleanUpForCrash(detailedExitCode);
logger.atInfo().log("Cleaned up runtime.");
} else {
logger.atInfo().log("No runtime to clean.");
@@ -372,7 +404,7 @@ private static void dumpHeap(String path) throws IOException {
ManagementFactory.getPlatformMBeanServer(),
"com.sun.management:type=HotSpotDiagnostic",
HotSpotDiagnosticMXBean.class);
- mxBean.dumpHeap(path, /*live=*/ true);
+ mxBean.dumpHeap(path, /* live= */ true);
}
/**
diff --git a/src/test/java/com/google/devtools/build/lib/bugreport/BUILD b/src/test/java/com/google/devtools/build/lib/bugreport/BUILD
index 620cde1c11a371..b8fbc97ef04f60 100644
--- a/src/test/java/com/google/devtools/build/lib/bugreport/BUILD
+++ b/src/test/java/com/google/devtools/build/lib/bugreport/BUILD
@@ -27,6 +27,8 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/util:exit_code",
"//src/main/java/com/google/devtools/build/lib/util:logging",
"//src/main/protobuf:failure_details_java_proto",
+ "//src/test/java/com/google/devtools/build/lib/testutil:TestThread",
+ "//src/test/java/com/google/devtools/build/lib/testutil:TestUtils",
"//third_party:guava",
"//third_party:guava-testlib",
"//third_party:junit4",
diff --git a/src/test/java/com/google/devtools/build/lib/bugreport/BugReportTest.java b/src/test/java/com/google/devtools/build/lib/bugreport/BugReportTest.java
index a61662de2ce0da..238ac28104e22b 100644
--- a/src/test/java/com/google/devtools/build/lib/bugreport/BugReportTest.java
+++ b/src/test/java/com/google/devtools/build/lib/bugreport/BugReportTest.java
@@ -16,6 +16,8 @@
import static com.google.common.truth.Truth.assertThat;
import static com.google.common.util.concurrent.Futures.immediateFuture;
+import static com.google.common.util.concurrent.Uninterruptibles.awaitUninterruptibly;
+import static java.util.concurrent.TimeUnit.SECONDS;
import static org.junit.Assert.assertThrows;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.doAnswer;
@@ -34,6 +36,8 @@
import com.google.devtools.build.lib.server.FailureDetails;
import com.google.devtools.build.lib.server.FailureDetails.Crash.Code;
import com.google.devtools.build.lib.server.FailureDetails.FailureDetail;
+import com.google.devtools.build.lib.testutil.TestThread;
+import com.google.devtools.build.lib.testutil.TestUtils;
import com.google.devtools.build.lib.util.CrashFailureDetails;
import com.google.devtools.build.lib.util.CustomExitCodePublisher;
import com.google.devtools.build.lib.util.CustomFailureDetailPublisher;
@@ -48,6 +52,7 @@
import java.nio.file.Path;
import java.security.Permission;
import java.util.Arrays;
+import java.util.concurrent.CountDownLatch;
import java.util.logging.Level;
import java.util.logging.LogRecord;
import java.util.logging.Logger;
@@ -135,17 +140,17 @@ Throwable createThrowable() {
private enum ExceptionType {
FATAL(
new RuntimeException("fatal exception"),
- /*isFatal=*/ true,
+ /* isFatal= */ true,
Level.SEVERE,
"myProductName crashed with args: arg foo"),
NONFATAL(
new IllegalStateException("bug report"),
- /*isFatal=*/ false,
+ /* isFatal= */ false,
Level.WARNING,
"myProductName had a non fatal error with args: arg foo"),
OOM(
new OutOfMemoryError("Java heap space"),
- /*isFatal=*/ true,
+ /* isFatal= */ true,
Level.SEVERE,
"myProductName OOMError: arg foo");
@@ -219,6 +224,67 @@ public void logException(@TestParameter ExceptionType exceptionType) {
assertThat(got.getLevel()).isEqualTo(exceptionType.level);
}
+ /**
+ * Regression test verifying that {@link BugReport#handleCrash(Crash, CrashContext)} is re-entrant
+ * and can be called from multiple threads even if the first caller is blocking.
+ */
+ @Test
+ public void handleCrash_reentrant(@TestParameter CrashType crashType)
+ throws InterruptedException {
+ ////// Arrange:
+ // A separate thread will block until `firstCrashWaitsLatch` is ready, then crash using the
+ // `crashType` test parameter.
+ CountDownLatch firstCrashStartedReportingLatch = new CountDownLatch(1);
+ CountDownLatch firstCrashWaitsLatch = new CountDownLatch(1);
+ doAnswer(
+ (inv) -> {
+ firstCrashStartedReportingLatch.countDown();
+ awaitUninterruptibly(firstCrashWaitsLatch);
+ return null;
+ })
+ .when(mockRuntime)
+ .cleanUpForCrash(any());
+
+ Throwable t = crashType.createThrowable();
+ TestThread blockingCrashThread =
+ new TestThread(
+ () -> {
+ SecurityException e =
+ assertThrows(SecurityException.class, () -> BugReport.handleCrash(t));
+ if (e instanceof ExitException) {
+ int code = ((ExitException) e).code;
+ assertThat(code).isEqualTo(crashType.expectedExitCode.getNumericExitCode());
+ }
+ });
+ blockingCrashThread.start();
+ // Wait for the TestThread to start, call BugReport#handleCrash(), and call
+ // BlazeRuntimeInterface#cleanUpForCrash().
+ assertThat(firstCrashStartedReportingLatch.await(TestUtils.WAIT_TIMEOUT_SECONDS, SECONDS))
+ .isTrue();
+ assertThat(blockingCrashThread.isAlive()).isTrue();
+
+ ////// Act:
+ // Crash on the main test thread while the first crash is still blocking forever.
+ Throwable secondThreadCrash = new UnsupportedOperationException("Second thread crashing.");
+ SecurityException secondThreadSecurityException =
+ assertThrows(SecurityException.class, () -> BugReport.handleCrash(secondThreadCrash));
+
+ ////// Assert:
+ if (secondThreadSecurityException instanceof ExitException) {
+ int code = ((ExitException) secondThreadSecurityException).code;
+ assertThat(code).isEqualTo(ExitCode.BLAZE_INTERNAL_ERROR.getNumericExitCode());
+ }
+ // Verify the latest exception stored is the main-thread crashing throwable.
+ assertThat(BugReport.getAndResetLastCrashingThrowableIfInTest())
+ .isSameInstanceAs(secondThreadCrash);
+
+ // Unblock the first crashing thread and verify that it completes. Asserting its state will
+ // ensure the test fails if the wrong exit code is returned to the first call to handleCrash().
+ firstCrashWaitsLatch.countDown();
+ blockingCrashThread.joinAndAssertState(TestUtils.WAIT_TIMEOUT_MILLISECONDS);
+ verify(mockRuntime).cleanUpForCrash(any());
+ }
+
@Test
public void convenienceMethod(@TestParameter CrashType crashType) throws Exception {
Throwable t = crashType.createThrowable();
@@ -401,4 +467,3 @@ public void checkExit(int code) {
public void checkPermission(Permission p) {} // Allow everything else.
}
}
-