diff --git a/src/main/java/com/google/devtools/build/lib/bugreport/BugReport.java b/src/main/java/com/google/devtools/build/lib/bugreport/BugReport.java index d7bc9a59939447..122ef16c2d023c 100644 --- a/src/main/java/com/google/devtools/build/lib/bugreport/BugReport.java +++ b/src/main/java/com/google/devtools/build/lib/bugreport/BugReport.java @@ -38,6 +38,7 @@ import java.lang.management.ManagementFactory; import java.util.Arrays; import java.util.List; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.logging.Level; import javax.annotation.Nullable; import javax.annotation.concurrent.GuardedBy; @@ -61,13 +62,31 @@ public final class BugReport { private static BlazeRuntimeInterface runtime = null; + /** + * The most recent throwable that reported a crash; used only for testing purposes where the JVM + * will not halt. + */ @SuppressWarnings("StaticAssignmentOfThrowable") @GuardedBy("LOCK") @Nullable private static Throwable lastCrashingThrowable = null; + /** + * A global lock object, used with {@code synchronized} blocks to provide thread-safe access to + * {@link BugReport#lastCrashingThrowable}. + */ private static final Object LOCK = new Object(); + /** + * Flag set by {@link #handleCrash(Crash, CrashContext)} using {@link + * AtomicBoolean#compareAndSet(boolean, boolean)} to determine which thread "wins" racing crashes. + * + *

All concurrent calls to {@link #handleCrash(Crash, CrashContext)} will be logged, but only + * the thread which sets this flag {@code true} will propagate the crash to exit data and to each + * {@code BlazeModule}. + */ + private static final AtomicBoolean alreadyHandlingCrash = new AtomicBoolean(false); + private static final boolean SHOULD_NOT_SEND_BUG_REPORT_BECAUSE_IN_TEST = TestType.isInTest() && System.getenv("ENABLE_BUG_REPORT_LOGGING_IN_TEST") == null; @@ -188,7 +207,7 @@ public static void sendBugReport(@FormatString String message, Object... args) { * report} without additional arguments. */ public static void sendBugReport(Throwable exception) { - sendBugReport(exception, /*args=*/ ImmutableList.of()); + sendBugReport(exception, /* args= */ ImmutableList.of()); } /** @@ -199,12 +218,12 @@ public static void sendBugReport(Throwable exception) { * @param values Additional string values to clarify the exception. */ public static void sendBugReport(Throwable exception, List args, String... values) { - sendBugReportInternal(exception, /*isFatal=*/ true, filterArgs(args), values); + sendBugReportInternal(exception, /* isFatal= */ true, filterArgs(args), values); } /** Logs the bug report, indicating it is not a crash. */ public static void sendNonFatalBugReport(Throwable exception) { - sendBugReportInternal(exception, /*isFatal=*/ false, /*args=*/ ImmutableList.of()); + sendBugReportInternal(exception, /* isFatal= */ false, /* args= */ ImmutableList.of()); } private static void sendBugReportInternal( @@ -240,43 +259,45 @@ public static RuntimeException handleCrash(Throwable throwable, String... args) * Otherwise, for {@link CrashContext#keepAlive}, returns {@code null}, in which case the caller * is responsible for shutting down the server. */ - @SuppressWarnings("StaticAssignmentOfThrowable") + @SuppressWarnings({"StaticAssignmentOfThrowable", "LogAndThrow"}) public static void handleCrash(Crash crash, CrashContext ctx) { - int numericExitCode = crash.getDetailedExitCode().getExitCode().getNumericExitCode(); Throwable throwable = crash.getThrowable(); if (runtime != null) { runtime.fillInCrashContext(ctx); } - try { - synchronized (LOCK) { - logger.atSevere().withCause(throwable).log("Handling crash with %s", ctx); - if (TestType.isInTest()) { + boolean isFirstCrash = alreadyHandlingCrash.compareAndSet(false, true); + logger.atSevere().withCause(throwable).log( + "Handling crash (isFirstCrash = %s) with CrashContext %s", isFirstCrash, ctx); + try { + if (TestType.isInTest()) { + synchronized (LOCK) { lastCrashingThrowable = throwable; } + } - String crashMsg; - String heapDumpPath; - // Might be a wrapped OOM - the detailed exit code reflects the root cause. - boolean isOom = crash.getDetailedExitCode().getExitCode().equals(ExitCode.OOM_ERROR); - if (isOom) { - crashMsg = constructOomExitMessage(ctx.getExtraOomInfo()); - heapDumpPath = ctx.getHeapDumpPath(); - if (heapDumpPath != null) { - crashMsg += " An attempt will be made to write a heap dump to " + heapDumpPath + "."; - } - } else { - crashMsg = getProductName() + " crashed due to an internal error."; - heapDumpPath = null; + String crashMsg; + String heapDumpPath; + // Might be a wrapped OOM - the detailed exit code reflects the root cause. + boolean isOom = crash.getDetailedExitCode().getExitCode().equals(ExitCode.OOM_ERROR); + if (isOom) { + crashMsg = constructOomExitMessage(ctx.getExtraOomInfo()); + heapDumpPath = ctx.getHeapDumpPath(); + if (heapDumpPath != null) { + crashMsg += " An attempt will be made to write a heap dump to " + heapDumpPath + "."; } - crashMsg += " Printing stack trace:\n" + Throwables.getStackTraceAsString(throwable); - ctx.getEventHandler().handle(Event.fatal(crashMsg)); - + } else { + crashMsg = getProductName() + " crashed due to an internal error."; + heapDumpPath = null; + } + crashMsg += " Printing stack trace:\n" + Throwables.getStackTraceAsString(throwable); + ctx.getEventHandler().handle(Event.fatal(crashMsg)); + if (isFirstCrash) { try { // Emit exit data before sending a bug report. Bug reports involve an RPC, and given that // we are crashing, who knows if it will complete. It's more important that we write // exit code and failure detail information so that the crash can be handled correctly. - emitExitData(crash, ctx, numericExitCode, heapDumpPath); + emitExitData(crash, ctx, heapDumpPath); // Skip sending a bug report if the crash is an OOM - attempting an RPC while out of // memory can cause issues. Also, don't try to send a bug report during a crash in a test, // it will throw itself. @@ -284,13 +305,14 @@ public static void handleCrash(Crash crash, CrashContext ctx) { sendBugReport(throwable, ctx.getArgs()); } } finally { - if (ctx.shouldHaltJvm()) { - // Avoid shutdown deadlock issues: If an application shutdown hook crashes, it will - // trigger our Blaze crash handler (this method). Calling System#exit() here, would - // therefore induce a deadlock. This call would block on the shutdown sequence - // completing, but the shutdown sequence would in turn be blocked on this thread - // finishing. Instead, exit fast via halt(). - Runtime.getRuntime().halt(numericExitCode); + try { + haltJvmIfRequested(crash, ctx); + } finally { + // Reset the global `alreadyHandlingCrash` flag while testing, so subsequent tests can + // exercise BugReport#handleCrash(). (Note that Runtime#halt() throws in tests.) + if (TestType.isInTest()) { + alreadyHandlingCrash.set(false); + } } } } @@ -309,9 +331,7 @@ public static void handleCrash(Crash crash, CrashContext ctx) { System.err.println("Exception encountered during BugReport#handleCrash:"); t.printStackTrace(System.err); } finally { - if (ctx.shouldHaltJvm()) { - Runtime.getRuntime().halt(numericExitCode); - } + haltJvmIfRequested(crash, ctx); } if (!ctx.shouldHaltJvm()) { return; @@ -320,14 +340,26 @@ public static void handleCrash(Crash crash, CrashContext ctx) { throw new IllegalStateException("Should have halted", throwable); } + private static void haltJvmIfRequested(Crash crash, CrashContext ctx) { + if (ctx.shouldHaltJvm()) { + // Avoid shutdown deadlock issues: If an application shutdown hook crashes, it will + // trigger our Blaze crash handler (this method). Calling System#exit() here, would + // therefore induce a deadlock. This call would block on the shutdown sequence + // completing, but the shutdown sequence would in turn be blocked on this thread + // finishing. Instead, exit fast via halt(). + Runtime.getRuntime().halt(crash.getDetailedExitCode().getExitCode().getNumericExitCode()); + } + } + /** * Writes exit status files, dumps heap if requested, and calls {@link * BlazeRuntimeInterface#cleanUpForCrash}. */ - private static void emitExitData( - Crash crash, CrashContext ctx, int numericExitCode, @Nullable String heapDumpPath) { + private static void emitExitData(Crash crash, CrashContext ctx, @Nullable String heapDumpPath) { // Writing the exit code status file is only necessary if we are halting. Otherwise, the // caller is responsible for an orderly shutdown with the proper exit code. + DetailedExitCode detailedExitCode = crash.getDetailedExitCode(); + int numericExitCode = detailedExitCode.getExitCode().getNumericExitCode(); if (ctx.shouldHaltJvm()) { if (CustomExitCodePublisher.maybeWriteExitStatusFile(numericExitCode)) { logger.atInfo().log("Wrote exit status file."); @@ -337,7 +369,7 @@ private static void emitExitData( } if (CustomFailureDetailPublisher.maybeWriteFailureDetailFile( - crash.getDetailedExitCode().getFailureDetail())) { + detailedExitCode.getFailureDetail())) { logger.atInfo().log("Wrote failure detail file."); } else { logger.atWarning().log("Did not write failure detail file; check stderr for errors."); @@ -354,7 +386,7 @@ private static void emitExitData( } if (runtime != null) { - runtime.cleanUpForCrash(crash.getDetailedExitCode()); + runtime.cleanUpForCrash(detailedExitCode); logger.atInfo().log("Cleaned up runtime."); } else { logger.atInfo().log("No runtime to clean."); @@ -372,7 +404,7 @@ private static void dumpHeap(String path) throws IOException { ManagementFactory.getPlatformMBeanServer(), "com.sun.management:type=HotSpotDiagnostic", HotSpotDiagnosticMXBean.class); - mxBean.dumpHeap(path, /*live=*/ true); + mxBean.dumpHeap(path, /* live= */ true); } /** diff --git a/src/test/java/com/google/devtools/build/lib/bugreport/BUILD b/src/test/java/com/google/devtools/build/lib/bugreport/BUILD index 620cde1c11a371..b8fbc97ef04f60 100644 --- a/src/test/java/com/google/devtools/build/lib/bugreport/BUILD +++ b/src/test/java/com/google/devtools/build/lib/bugreport/BUILD @@ -27,6 +27,8 @@ java_library( "//src/main/java/com/google/devtools/build/lib/util:exit_code", "//src/main/java/com/google/devtools/build/lib/util:logging", "//src/main/protobuf:failure_details_java_proto", + "//src/test/java/com/google/devtools/build/lib/testutil:TestThread", + "//src/test/java/com/google/devtools/build/lib/testutil:TestUtils", "//third_party:guava", "//third_party:guava-testlib", "//third_party:junit4", diff --git a/src/test/java/com/google/devtools/build/lib/bugreport/BugReportTest.java b/src/test/java/com/google/devtools/build/lib/bugreport/BugReportTest.java index a61662de2ce0da..238ac28104e22b 100644 --- a/src/test/java/com/google/devtools/build/lib/bugreport/BugReportTest.java +++ b/src/test/java/com/google/devtools/build/lib/bugreport/BugReportTest.java @@ -16,6 +16,8 @@ import static com.google.common.truth.Truth.assertThat; import static com.google.common.util.concurrent.Futures.immediateFuture; +import static com.google.common.util.concurrent.Uninterruptibles.awaitUninterruptibly; +import static java.util.concurrent.TimeUnit.SECONDS; import static org.junit.Assert.assertThrows; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.doAnswer; @@ -34,6 +36,8 @@ import com.google.devtools.build.lib.server.FailureDetails; import com.google.devtools.build.lib.server.FailureDetails.Crash.Code; import com.google.devtools.build.lib.server.FailureDetails.FailureDetail; +import com.google.devtools.build.lib.testutil.TestThread; +import com.google.devtools.build.lib.testutil.TestUtils; import com.google.devtools.build.lib.util.CrashFailureDetails; import com.google.devtools.build.lib.util.CustomExitCodePublisher; import com.google.devtools.build.lib.util.CustomFailureDetailPublisher; @@ -48,6 +52,7 @@ import java.nio.file.Path; import java.security.Permission; import java.util.Arrays; +import java.util.concurrent.CountDownLatch; import java.util.logging.Level; import java.util.logging.LogRecord; import java.util.logging.Logger; @@ -135,17 +140,17 @@ Throwable createThrowable() { private enum ExceptionType { FATAL( new RuntimeException("fatal exception"), - /*isFatal=*/ true, + /* isFatal= */ true, Level.SEVERE, "myProductName crashed with args: arg foo"), NONFATAL( new IllegalStateException("bug report"), - /*isFatal=*/ false, + /* isFatal= */ false, Level.WARNING, "myProductName had a non fatal error with args: arg foo"), OOM( new OutOfMemoryError("Java heap space"), - /*isFatal=*/ true, + /* isFatal= */ true, Level.SEVERE, "myProductName OOMError: arg foo"); @@ -219,6 +224,67 @@ public void logException(@TestParameter ExceptionType exceptionType) { assertThat(got.getLevel()).isEqualTo(exceptionType.level); } + /** + * Regression test verifying that {@link BugReport#handleCrash(Crash, CrashContext)} is re-entrant + * and can be called from multiple threads even if the first caller is blocking. + */ + @Test + public void handleCrash_reentrant(@TestParameter CrashType crashType) + throws InterruptedException { + ////// Arrange: + // A separate thread will block until `firstCrashWaitsLatch` is ready, then crash using the + // `crashType` test parameter. + CountDownLatch firstCrashStartedReportingLatch = new CountDownLatch(1); + CountDownLatch firstCrashWaitsLatch = new CountDownLatch(1); + doAnswer( + (inv) -> { + firstCrashStartedReportingLatch.countDown(); + awaitUninterruptibly(firstCrashWaitsLatch); + return null; + }) + .when(mockRuntime) + .cleanUpForCrash(any()); + + Throwable t = crashType.createThrowable(); + TestThread blockingCrashThread = + new TestThread( + () -> { + SecurityException e = + assertThrows(SecurityException.class, () -> BugReport.handleCrash(t)); + if (e instanceof ExitException) { + int code = ((ExitException) e).code; + assertThat(code).isEqualTo(crashType.expectedExitCode.getNumericExitCode()); + } + }); + blockingCrashThread.start(); + // Wait for the TestThread to start, call BugReport#handleCrash(), and call + // BlazeRuntimeInterface#cleanUpForCrash(). + assertThat(firstCrashStartedReportingLatch.await(TestUtils.WAIT_TIMEOUT_SECONDS, SECONDS)) + .isTrue(); + assertThat(blockingCrashThread.isAlive()).isTrue(); + + ////// Act: + // Crash on the main test thread while the first crash is still blocking forever. + Throwable secondThreadCrash = new UnsupportedOperationException("Second thread crashing."); + SecurityException secondThreadSecurityException = + assertThrows(SecurityException.class, () -> BugReport.handleCrash(secondThreadCrash)); + + ////// Assert: + if (secondThreadSecurityException instanceof ExitException) { + int code = ((ExitException) secondThreadSecurityException).code; + assertThat(code).isEqualTo(ExitCode.BLAZE_INTERNAL_ERROR.getNumericExitCode()); + } + // Verify the latest exception stored is the main-thread crashing throwable. + assertThat(BugReport.getAndResetLastCrashingThrowableIfInTest()) + .isSameInstanceAs(secondThreadCrash); + + // Unblock the first crashing thread and verify that it completes. Asserting its state will + // ensure the test fails if the wrong exit code is returned to the first call to handleCrash(). + firstCrashWaitsLatch.countDown(); + blockingCrashThread.joinAndAssertState(TestUtils.WAIT_TIMEOUT_MILLISECONDS); + verify(mockRuntime).cleanUpForCrash(any()); + } + @Test public void convenienceMethod(@TestParameter CrashType crashType) throws Exception { Throwable t = crashType.createThrowable(); @@ -401,4 +467,3 @@ public void checkExit(int code) { public void checkPermission(Permission p) {} // Allow everything else. } } -