From 96bb0ee4829d0eb219baf723436fbfad0e6a28fb Mon Sep 17 00:00:00 2001 From: Mike McLaughlin Date: Fri, 13 Aug 2021 18:02:05 -0700 Subject: [PATCH 1/4] Fix crashreport for stack overflow Issue: https://github.com/dotnet/runtime/issues/57032 The stack overflow managed exception wasn't been set/written out. Changed the "unmanaged_frames" value to "stack_frames" because it includes both native and managed frames. Aggregated the repeated stack frames adding a "repeated"/repeated_frames value/array for the time of times the sequence was repeated. Looks like this: { "repeated" : "0x1542", "repeated_frames" : [ { "is_managed" : "true", "module_address" : "0x10e402000", "stack_pointer" : "0x7000045ce020", "native_address" : "0x11b9ee8c9", "native_offset" : "0x29", "token" : "0x600006f", "il_offset" : "0x0", "method_name" : "Macson.Client.Diagnostics.CrashyClass.Foo2()", "timestamp" : "0xa8561820", "sizeofimage" : "0xe000", "filename" : "Macson.Client.dll", "guid" : "49cd869a682942bc95c0c34ca206c61d" }, { "is_managed" : "true", "module_address" : "0x10e402000", "stack_pointer" : "0x7000045ce040", "native_address" : "0x11b9ee879", "native_offset" : "0x29", "token" : "0x6000070", "il_offset" : "0x0", "method_name" : "Macson.Client.Diagnostics.CrashyClass.Foo1()", ... }, ] } --- .../debug/createdump/crashreportwriter.cpp | 19 ++++- src/coreclr/debug/createdump/threadinfo.cpp | 80 +++++++++++++++++-- src/coreclr/debug/createdump/threadinfo.h | 14 +++- .../debug/createdump/threadinfomac.cpp | 5 ++ .../debug/createdump/threadinfounix.cpp | 7 +- 5 files changed, 109 insertions(+), 16 deletions(-) diff --git a/src/coreclr/debug/createdump/crashreportwriter.cpp b/src/coreclr/debug/createdump/crashreportwriter.cpp index e72651e4f824b..4552f10d328fe 100644 --- a/src/coreclr/debug/createdump/crashreportwriter.cpp +++ b/src/coreclr/debug/createdump/crashreportwriter.cpp @@ -148,12 +148,23 @@ CrashReportWriter::WriteCrashReport() WriteValue64("BP", thread->GetFramePointer()); CloseObject(); // ctx - OpenArray("unmanaged_frames"); - for (const StackFrame& frame : thread->StackFrames()) + OpenArray("stack_frames"); + for (std::set::iterator iterator = thread->StackFrames().begin(); iterator != thread->StackFrames().end(); ++iterator) { - WriteStackFrame(frame); + if (thread->IsBeginRepeat(iterator)) + { + OpenObject(); + WriteValue32("repeated", thread->NumRepeatedFrames()); + OpenArray("repeated_frames"); + } + if (thread->IsEndRepeat(iterator)) + { + CloseArray(); // repeated_frames + CloseObject(); + } + WriteStackFrame(*iterator); } - CloseArray(); // unmanaged_frames + CloseArray(); // stack_frames CloseObject(); } CloseArray(); // threads diff --git a/src/coreclr/debug/createdump/threadinfo.cpp b/src/coreclr/debug/createdump/threadinfo.cpp index 2107c6c1bafb7..18d0dfeb6f3aa 100644 --- a/src/coreclr/debug/createdump/threadinfo.cpp +++ b/src/coreclr/debug/createdump/threadinfo.cpp @@ -141,7 +141,12 @@ ThreadInfo::UnwindThread(IXCLRDataProcess* pClrDataProcess, ISOSDacInterface* pS m_managed = true; ReleaseHolder pException; - if (SUCCEEDED(pTask->GetCurrentExceptionState(&pException))) + HRESULT hr = pTask->GetCurrentExceptionState(&pException); + if (FAILED(hr)) + { + hr = pTask->GetLastExceptionState(&pException); + } + if (SUCCEEDED(hr)) { TRACE("Unwind: found managed exception\n"); @@ -270,18 +275,77 @@ ThreadInfo::GatherStackFrames(CONTEXT* pContext, IXCLRDataStackWalk* pStackwalk) AddStackFrame(frame); } +// This function deals with two types of frames: duplicate stack frames (SP is equal) and repeated frames (IP is +// equal) because of a stack overflow. +// +// The list of constraints: +// +// 1) The StackFrame is immutable i.e. can't add some kind of repeat count to the frame. Making it mutable is big hassle. +// 2) The native unwinding can repeat the same frame SP/IP. These frames are not counted as repeated stack overflow ones. +// 3) Only add the repeated stack overflow frames once to frames set. This saves time and memory. void ThreadInfo::AddStackFrame(const StackFrame& frame) { - std::pair::iterator,bool> result = m_frames.insert(frame); - if (result.second) + // This filters out the duplicate stack frames that are the result the native + // unwinding happening between each managed frame. If the SP matches a frame + // already in the set, skip it. + const std::set::iterator& found = m_frames.find(frame); + if (found == m_frames.end()) { + // Aggregated the repeated stack frames only for stack overflow exceptions + if (m_exceptionHResult == STACK_OVERFLOW_EXCEPTION) + { + // Check for repeats through all the stack frames so far until we find one + if (m_beginRepeat == m_frames.end()) + { + for (std::set::iterator iterator = m_frames.begin(); iterator != m_frames.end(); ++iterator) + { + if (frame.InstructionPointer() == iterator->InstructionPointer()) + { + m_repeatedFrames++; + m_beginRepeat = iterator; + TRACE("Unwind: begin repeat sp %p ip %p\n", (void*)frame.StackPointer(), (void*)frame.InstructionPointer()); + return; + } + } + } + + // Check for repeats until we stop find them + if (m_endRepeat == m_frames.end()) + { + for (std::set::iterator iterator = m_beginRepeat; iterator != m_endRepeat; ++iterator) + { + if (frame.InstructionPointer() == iterator->InstructionPointer()) + { + m_repeatedFrames++; + return; + } + } + } + } + + // Add the non-duplicate and (if stack overflow) non-repeating frames to set + std::pair::iterator, bool> result = m_frames.insert(frame); + assert(result.second); + TRACE("Unwind: sp %p ip %p off %08x mod %p%c\n", - (void*)frame.StackPointer(), - (void*)frame.InstructionPointer(), - frame.NativeOffset(), - (void*)frame.ModuleAddress(), - frame.IsManaged() ? '*' : ' '); + (void*)frame.StackPointer(), (void*)frame.InstructionPointer(), frame.NativeOffset(), (void*)frame.ModuleAddress(), frame.IsManaged() ? '*' : ' '); + + // Don't start tracking the end of the repeated frames until there is a start + if (m_beginRepeat != m_frames.end() && m_endRepeat == m_frames.end()) + { + TRACE("Unwind: end repeat sp %p ip %p\n", (void*)frame.StackPointer(), (void*)frame.InstructionPointer()); + m_endRepeat = result.first; + + // Count the number of frames in the repeating sequence and calculate how many times the sequence was repeated + int framesRepeated = 0; + for (std::set::iterator iterator = m_beginRepeat; iterator != m_endRepeat; ++iterator) + { + framesRepeated++; + } + // The total number of individually repeated frames has to be greater than the number of frames in the repeating sequence + m_repeatedFrames = framesRepeated > 0 && m_repeatedFrames >= framesRepeated ? (m_repeatedFrames / framesRepeated) + 1 : 0; + } } } diff --git a/src/coreclr/debug/createdump/threadinfo.h b/src/coreclr/debug/createdump/threadinfo.h index 7ce0df5f1ec18..c0ff905be4a94 100644 --- a/src/coreclr/debug/createdump/threadinfo.h +++ b/src/coreclr/debug/createdump/threadinfo.h @@ -33,6 +33,8 @@ struct user_vfpregs_struct } __attribute__((__packed__)); #endif +#define STACK_OVERFLOW_EXCEPTION 0x800703e9 + class ThreadInfo { private: @@ -43,8 +45,11 @@ class ThreadInfo bool m_managed; // if true, thread has managed code running uint64_t m_exceptionObject; // exception object address std::string m_exceptionType; // exception type - int32_t m_exceptionHResult; // exception HRESULT + uint32_t m_exceptionHResult; // exception HRESULT std::set m_frames; // stack frames + int m_repeatedFrames; // number of repeated frames + std::set::iterator m_beginRepeat; // beginning of stack overflow repeated frame sequence + std::set::iterator m_endRepeat; // end of repeated frame sequence #ifdef __APPLE__ mach_port_t m_port; // MacOS thread port @@ -88,9 +93,12 @@ class ThreadInfo inline bool IsManaged() const { return m_managed; } inline uint64_t ManagedExceptionObject() const { return m_exceptionObject; } - inline int32_t ManagedExceptionHResult() const { return m_exceptionHResult; } + inline uint32_t ManagedExceptionHResult() const { return m_exceptionHResult; } inline std::string ManagedExceptionType() const { return m_exceptionType; } - inline const std::set StackFrames() const { return m_frames; } + inline const std::set& StackFrames() const { return m_frames; } + inline int ThreadInfo::NumRepeatedFrames() const { return m_repeatedFrames; } + inline bool IsBeginRepeat(std::set::iterator& iterator) const { return m_repeatedFrames > 0 && iterator == m_beginRepeat; } + inline bool IsEndRepeat(std::set::iterator& iterator) const { return m_repeatedFrames > 0 && iterator == m_endRepeat; } #ifdef __APPLE__ #if defined(__x86_64__) diff --git a/src/coreclr/debug/createdump/threadinfomac.cpp b/src/coreclr/debug/createdump/threadinfomac.cpp index 92b9339088fca..918f0ae2310b5 100644 --- a/src/coreclr/debug/createdump/threadinfomac.cpp +++ b/src/coreclr/debug/createdump/threadinfomac.cpp @@ -6,11 +6,16 @@ ThreadInfo::ThreadInfo(CrashInfo& crashInfo, pid_t tid, mach_port_t port) : m_crashInfo(crashInfo), m_tid(tid), + m_ppid(0), + m_tgid(0), m_managed(false), m_exceptionObject(0), m_exceptionHResult(0), + m_repeatedFrames(0), m_port(port) { + m_beginRepeat = m_frames.end(); + m_endRepeat = m_frames.end(); } ThreadInfo::~ThreadInfo() diff --git a/src/coreclr/debug/createdump/threadinfounix.cpp b/src/coreclr/debug/createdump/threadinfounix.cpp index 90a4c8ab9ffe5..856dfbb299379 100644 --- a/src/coreclr/debug/createdump/threadinfounix.cpp +++ b/src/coreclr/debug/createdump/threadinfounix.cpp @@ -25,10 +25,15 @@ bool GetStatus(pid_t pid, pid_t* ppid, pid_t* tgid, std::string* name); ThreadInfo::ThreadInfo(CrashInfo& crashInfo, pid_t tid) : m_crashInfo(crashInfo), m_tid(tid), + m_ppid(0), + m_tgid(0), m_managed(false), m_exceptionObject(0), - m_exceptionHResult(0) + m_exceptionHResult(0), + m_repeatedFrames(0) { + m_beginRepeat = m_frames.end(); + m_endRepeat = m_frames.end(); } ThreadInfo::~ThreadInfo() From 71638b6c8254c79fe42e4d12d7e0c384de1de464 Mon Sep 17 00:00:00 2001 From: Mike McLaughlin Date: Sun, 15 Aug 2021 13:36:09 -0700 Subject: [PATCH 2/4] Update createdump doc --- .../coreclr/botr/xplat-minidump-generation.md | 36 ++++++++++--------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/docs/design/coreclr/botr/xplat-minidump-generation.md b/docs/design/coreclr/botr/xplat-minidump-generation.md index 20380fe222010..0ebbcbba59184 100644 --- a/docs/design/coreclr/botr/xplat-minidump-generation.md +++ b/docs/design/coreclr/botr/xplat-minidump-generation.md @@ -60,8 +60,9 @@ Environment variables supported: - `COMPlus_DbgEnableMiniDump`: if set to "1", enables this core dump generation. The default is NOT to generate a dump. - `COMPlus_DbgMiniDumpType`: See below. Default is "2" MiniDumpWithPrivateReadWriteMemory. -- `COMPlus_DbgMiniDumpName`: if set, use as the template to create the dump path and file name. The pid can be placed in the name with %d. The default is _/tmp/coredump.%d_. +- `COMPlus_DbgMiniDumpName`: if set, use as the template to create the dump path and file name. See "Dump name formatting" for how the dump name can be formatted. The default is _/tmp/coredump.%p_. - `COMPlus_CreateDumpDiagnostics`: if set to "1", enables the _createdump_ utilities diagnostic messages (TRACE macro). +- `COMPlus_EnableCrashReport`: In .NET 6.0 or greater, if set to "1", createdump also generates a json formatted crash report which includes information about the threads and stack frames of the crashing application. The crash report name is the dump path/name with _.crashreport.json_ appended. COMPlus_DbgMiniDumpType values: @@ -79,15 +80,23 @@ COMPlus_DbgMiniDumpType values: The createdump utility can also be run from the command line on arbitrary .NET Core processes. The type of dump can be controlled with the below command switches. The default is a "minidump" which contains the majority the memory and managed state needed. Unless you have ptrace (CAP_SYS_PTRACE) administrative privilege, you need to run with sudo or su. The same as if you were attaching with lldb or other native debugger. -`sudo createdump ` - - -f, --name - dump path and file name. The %p, %e, %h %t format characters are supported. The default is '/tmp/coredump.%p' - -n, --normal - create minidump. - -h, --withheap - create minidump with heap (default). - -t, --triage - create triage minidump. - -u, --full - create full core dump. - -d, --diag - enable diagnostic messages. - +``` +createdump [options] pid +-f, --name - dump path and file name. The default is '/tmp/coredump.%p'. These specifiers are substituted with following values: + %p PID of dumped process. + %e The process executable filename. + %h Hostname return by gethostname(). + %t Time of dump, expressed as seconds since the Epoch, 1970-01-01 00:00:00 +0000 (UTC). +-n, --normal - create minidump. +-h, --withheap - create minidump with heap (default). +-t, --triage - create triage minidump. +-u, --full - create full core dump. +-d, --diag - enable diagnostic messages. +-v, --verbose - enable verbose diagnostic messages. +--crashreport - write crash report file. +--crashthread - the thread id of the crashing thread. +--signal - the signal code of the crash. +``` **Dump name formatting** @@ -105,10 +114,3 @@ As of .NET 5.0, the following subset of the core pattern (see [core](https://man The test plan is to modify the SOS tests in the (still) private debuggertests repo to trigger and use the core minidumps generated. Debugging managed core dumps on Linux is not supported by _mdbg_ at this time until we have a ELF core dump reader so only the SOS tests (which use _lldb_ on Linux) will be modified. # Open Issues # - -- May need more than just the pid for decorating dump names for docker containers because I think the _pid_ is always 1. -- Do we need all the memory mappings from `/proc/$pid/maps` in the PT\_LOAD sections even though the memory is not actually in the dump? They have a file offset/size of 0. Full dumps generated by the system or _gdb_ do have these un-backed regions. -- There is no way to get the signal number, etc. that causes the abort from the _createdump_ utility using _ptrace_ or a /proc file. It would have to be passed from CoreCLR on the command line. -- Do we need the "dynamic" sections of each shared module in the core dump? It is part of the "link_map" entry enumerated when gathering the _DSO_ information. -- There may be more versioning and/or build id information needed to be added to the dump. -- It is unclear exactly what cases stack overflow does not get control in the signal handler and when the OS just aborts the process. From 86b9c4ad7f62730ce68d4f5912a9458eed11971c Mon Sep 17 00:00:00 2001 From: Mike McLaughlin Date: Mon, 16 Aug 2021 05:03:55 -0700 Subject: [PATCH 3/4] Fix Windows triage dump flags --- src/coreclr/debug/createdump/main.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/coreclr/debug/createdump/main.cpp b/src/coreclr/debug/createdump/main.cpp index 4ed6366d4675a..f91a81ca66ccb 100644 --- a/src/coreclr/debug/createdump/main.cpp +++ b/src/coreclr/debug/createdump/main.cpp @@ -95,10 +95,13 @@ int __cdecl main(const int argc, const char* argv[]) { dumpType = "triage minidump"; minidumpType = (MINIDUMP_TYPE)(MiniDumpFilterTriage | - MiniDumpWithDataSegs | - MiniDumpWithHandleData | + MiniDumpIgnoreInaccessibleMemory | + MiniDumpWithoutOptionalData | + MiniDumpWithProcessThreadData | MiniDumpFilterModulePaths | - MiniDumpWithThreadInfo); + MiniDumpWithUnloadedModules | + MiniDumpFilterMemory | + MiniDumpWithHandleData); } else if ((strcmp(*argv, "-u") == 0) || (strcmp(*argv, "--full") == 0)) { From 3009764ba06303cedb8a55df2a20bf615e7e5163 Mon Sep 17 00:00:00 2001 From: Mike McLaughlin Date: Tue, 17 Aug 2021 09:58:35 -0700 Subject: [PATCH 4/4] Code review feedback --- docs/design/coreclr/botr/xplat-minidump-generation.md | 2 -- src/coreclr/debug/createdump/crashreportwriter.cpp | 2 +- src/coreclr/debug/createdump/threadinfo.cpp | 6 +++--- src/coreclr/debug/createdump/threadinfo.h | 10 +++++----- 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/docs/design/coreclr/botr/xplat-minidump-generation.md b/docs/design/coreclr/botr/xplat-minidump-generation.md index 0ebbcbba59184..d2bfeb2092186 100644 --- a/docs/design/coreclr/botr/xplat-minidump-generation.md +++ b/docs/design/coreclr/botr/xplat-minidump-generation.md @@ -112,5 +112,3 @@ As of .NET 5.0, the following subset of the core pattern (see [core](https://man # Testing # The test plan is to modify the SOS tests in the (still) private debuggertests repo to trigger and use the core minidumps generated. Debugging managed core dumps on Linux is not supported by _mdbg_ at this time until we have a ELF core dump reader so only the SOS tests (which use _lldb_ on Linux) will be modified. - -# Open Issues # diff --git a/src/coreclr/debug/createdump/crashreportwriter.cpp b/src/coreclr/debug/createdump/crashreportwriter.cpp index 4552f10d328fe..1fb0865cf5d5f 100644 --- a/src/coreclr/debug/createdump/crashreportwriter.cpp +++ b/src/coreclr/debug/createdump/crashreportwriter.cpp @@ -149,7 +149,7 @@ CrashReportWriter::WriteCrashReport() CloseObject(); // ctx OpenArray("stack_frames"); - for (std::set::iterator iterator = thread->StackFrames().begin(); iterator != thread->StackFrames().end(); ++iterator) + for (auto iterator = thread->StackFrames().cbegin(); iterator != thread->StackFrames().cend(); ++iterator) { if (thread->IsBeginRepeat(iterator)) { diff --git a/src/coreclr/debug/createdump/threadinfo.cpp b/src/coreclr/debug/createdump/threadinfo.cpp index 18d0dfeb6f3aa..82509f5275065 100644 --- a/src/coreclr/debug/createdump/threadinfo.cpp +++ b/src/coreclr/debug/createdump/threadinfo.cpp @@ -298,7 +298,7 @@ ThreadInfo::AddStackFrame(const StackFrame& frame) // Check for repeats through all the stack frames so far until we find one if (m_beginRepeat == m_frames.end()) { - for (std::set::iterator iterator = m_frames.begin(); iterator != m_frames.end(); ++iterator) + for (auto iterator = m_frames.cbegin(); iterator != m_frames.cend(); ++iterator) { if (frame.InstructionPointer() == iterator->InstructionPointer()) { @@ -313,7 +313,7 @@ ThreadInfo::AddStackFrame(const StackFrame& frame) // Check for repeats until we stop find them if (m_endRepeat == m_frames.end()) { - for (std::set::iterator iterator = m_beginRepeat; iterator != m_endRepeat; ++iterator) + for (auto iterator = m_beginRepeat; iterator != m_endRepeat; ++iterator) { if (frame.InstructionPointer() == iterator->InstructionPointer()) { @@ -339,7 +339,7 @@ ThreadInfo::AddStackFrame(const StackFrame& frame) // Count the number of frames in the repeating sequence and calculate how many times the sequence was repeated int framesRepeated = 0; - for (std::set::iterator iterator = m_beginRepeat; iterator != m_endRepeat; ++iterator) + for (auto iterator = m_beginRepeat; iterator != m_endRepeat; ++iterator) { framesRepeated++; } diff --git a/src/coreclr/debug/createdump/threadinfo.h b/src/coreclr/debug/createdump/threadinfo.h index c0ff905be4a94..4142b55e333dd 100644 --- a/src/coreclr/debug/createdump/threadinfo.h +++ b/src/coreclr/debug/createdump/threadinfo.h @@ -48,8 +48,8 @@ class ThreadInfo uint32_t m_exceptionHResult; // exception HRESULT std::set m_frames; // stack frames int m_repeatedFrames; // number of repeated frames - std::set::iterator m_beginRepeat; // beginning of stack overflow repeated frame sequence - std::set::iterator m_endRepeat; // end of repeated frame sequence + std::set::const_iterator m_beginRepeat; // beginning of stack overflow repeated frame sequence + std::set::const_iterator m_endRepeat; // end of repeated frame sequence #ifdef __APPLE__ mach_port_t m_port; // MacOS thread port @@ -96,9 +96,9 @@ class ThreadInfo inline uint32_t ManagedExceptionHResult() const { return m_exceptionHResult; } inline std::string ManagedExceptionType() const { return m_exceptionType; } inline const std::set& StackFrames() const { return m_frames; } - inline int ThreadInfo::NumRepeatedFrames() const { return m_repeatedFrames; } - inline bool IsBeginRepeat(std::set::iterator& iterator) const { return m_repeatedFrames > 0 && iterator == m_beginRepeat; } - inline bool IsEndRepeat(std::set::iterator& iterator) const { return m_repeatedFrames > 0 && iterator == m_endRepeat; } + inline int NumRepeatedFrames() const { return m_repeatedFrames; } + inline bool IsBeginRepeat(std::set::const_iterator& iterator) const { return m_repeatedFrames > 0 && iterator == m_beginRepeat; } + inline bool IsEndRepeat(std::set::const_iterator& iterator) const { return m_repeatedFrames > 0 && iterator == m_endRepeat; } #ifdef __APPLE__ #if defined(__x86_64__)