From 1ac87479d8188e9e8795e08e824fe926dec020a4 Mon Sep 17 00:00:00 2001 From: Brian Neradt Date: Fri, 2 Jan 2026 21:05:30 +0000 Subject: [PATCH] Fix false crash logs with regression tests When traffic_server exits normally (e.g., after regression tests complete), traffic_crashlog was incorrectly logging a crash because it detected its parent process had terminated. This happened because traffic_crashlog uses PR_SET_PDEATHSIG to wake up when traffic_server exits, but it couldn't distinguish between a crash (where crash_logger_invoke sends signal info via the pipe) and a normal exit (where the pipe is simply closed). This fix adds a poll() check on stdin to verify that crash data was actually sent before logging a crash, preventing false positive crash logs. --- src/traffic_crashlog/traffic_crashlog.cc | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/traffic_crashlog/traffic_crashlog.cc b/src/traffic_crashlog/traffic_crashlog.cc index 9354c5f838c..68c94acb6f5 100644 --- a/src/traffic_crashlog/traffic_crashlog.cc +++ b/src/traffic_crashlog/traffic_crashlog.cc @@ -32,6 +32,7 @@ #include "tscore/BaseLogFile.h" #include "tscore/runroot.h" #include "iocore/eventsystem/RecProcess.h" +#include #include static int syslog_mode = false; @@ -171,6 +172,25 @@ main(int /* argc ATS_UNUSED */, const char **argv) return 0; } + // In wait mode, we need to verify this is a real crash by checking if crash_logger_invoke + // sent us signal info via the pipe. If traffic_server just exited normally, the pipe will be + // closed with no data, and we should exit without logging a false "crash". + if (wait_mode) { + // Use poll to check if there's data available on stdin without blocking indefinitely. + struct pollfd pfd; + pfd.fd = STDIN_FILENO; + pfd.events = POLLIN; + + // Wait briefly for data. If crash_logger_invoke was called, data should already be there. + int poll_result = poll(&pfd, 1, 100); // 100ms timeout + + // POLLHUP means the write end of the pipe was closed - normal exit, not crash. + // No data or error also means no crash occurred. + if (poll_result <= 0 || (pfd.revents & POLLHUP) || !(pfd.revents & POLLIN)) { + return 0; + } + } + runroot_handler(argv); Layout::create(); RecProcessInit(nullptr /* diags */);