diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index e4b73d0b8..024d6fd74 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -44,7 +44,7 @@ jobs:
         if: ${{ runner.os == 'Linux' }}
         run: |
           sudo apt update
-          sudo apt install -y libcurl4-openssl-dev
+          sudo apt install -y libcurl4-openssl-dev libunwind-dev
 
       - name: Benchmark
         shell: bash
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index e25393bcc..5e7d2eb02 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -39,7 +39,7 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          # toolchain per runner-image availability:
+            # toolchain per runner-image availability:
             #   ubuntu 22.04
             #     GCC: 9-12
             #     clang: 11-15
@@ -140,6 +140,10 @@ jobs:
             os: macos-14
             ERROR_ON_WARNINGS: 1
             SYSTEM_VERSION_COMPAT: 0
+          - name: macOS 26 (xcode llvm)
+            os: macos-26
+            ERROR_ON_WARNINGS: 1
+            SYSTEM_VERSION_COMPAT: 0
           - name: macOS 14 (xcode llvm + universal)
             os: macos-14
             ERROR_ON_WARNINGS: 1
@@ -222,7 +226,7 @@ jobs:
         run: |
           sudo apt update
           # Install common dependencies
-          sudo apt install cmake llvm valgrind zlib1g-dev libcurl4-openssl-dev
+          sudo apt install cmake llvm valgrind zlib1g-dev libcurl4-openssl-dev libunwind-dev
           # For GCC, install both gcc-X and g++-X. For Clang, only install clang-X (includes C++ compiler)
           if [[ "$CC" == gcc-* ]]; then
             sudo apt install "${CC}" "${CXX}"
@@ -253,7 +257,7 @@ jobs:
         run: |
           sudo dpkg --add-architecture i386
           sudo apt update
-          sudo apt install cmake "${CC}-multilib" "${CXX}-multilib" zlib1g-dev:i386 libssl-dev:i386 libcurl4-openssl-dev:i386
+          sudo apt install cmake "${CC}-multilib" "${CXX}-multilib" zlib1g-dev:i386 libssl-dev:i386 libcurl4-openssl-dev:i386 libunwind-dev:i386 liblzma-dev:i386
 
       - name: Installing Alpine Linux Dependencies
         if: ${{ contains(matrix.container, 'alpine') }}
@@ -270,11 +274,6 @@ jobs:
           echo "$HOME/.dotnet" >> $GITHUB_PATH
           echo "DOTNET_ROOT=$HOME/.dotnet" >> $GITHUB_ENV
 
-        # https://github.com/actions/runner-images/issues/9491
-      - name: Decrease vm.mmap_rnd_bit to prevent ASLR ASAN issues
-        if: ${{ runner.os == 'Linux' && contains(env['RUN_ANALYZER'], 'asan') }}
-        run: sudo sysctl vm.mmap_rnd_bits=28
-
       - name: Installing CodeChecker
         if: ${{ contains(env['RUN_ANALYZER'], 'code-checker') }}
         run: sudo snap install codechecker --classic
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
index fb5468074..e6fd5681d 100644
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@@ -37,7 +37,7 @@ jobs:
       - name: Installing Linux Dependencies
         run: |
           sudo apt update
-          sudo apt install cmake clang-14 clang-tools llvm kcov g++-12 valgrind zlib1g-dev libcurl4-openssl-dev
+          sudo apt install cmake clang-14 clang-tools llvm kcov g++-12 valgrind zlib1g-dev libcurl4-openssl-dev libunwind-dev
 
       - if: matrix.language == 'java'
         name: Setup Java Version
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ab867b14d..065f0b76b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,11 @@
 
 ## Unreleased
 
+**Breaking**:
+
+- inproc(Linux): the `inproc` backend on Linux now depends on "nognu" `libunwind`. ([#1446](https://github.com/getsentry/sentry-native/pull/1446))
+- inproc: since we split `inproc` into signal-handler/UEF part and a separate handler thread, `before_send` and `on_crash` could be called from other threads than the one that crashed. While this was never part of the contract, if your code relies on this, it will no longer work. ([#1446](https://github.com/getsentry/sentry-native/pull/1446))
+
 **Fixes**:
 
 - Removed the 10-item limit per envelope for non-session data. Sessions are now limited to 100 per envelope, while other items (e.g., attachments) have no limit in amount. ([#1347](https://github.com/getsentry/sentry-native/pull/1347))
@@ -15,6 +20,12 @@
 - Add logs flush on `sentry_flush()`. ([#1434](https://github.com/getsentry/sentry-native/pull/1434))
 - Add global attributes API. These are added to all `sentry_log_X` calls. ([#1450](https://github.com/getsentry/sentry-native/pull/1450))
 
+**Fixes**:
+
+- Make the signal-handler synchronization fully atomic to fix rare race scenarios. ([#1446](https://github.com/getsentry/sentry-native/pull/1446))
+- Reintroduce an FP-based stack-walker for macOS that can start from a user context. This also makes `inproc` backend functional again on macOS 13+. ([#1446](https://github.com/getsentry/sentry-native/pull/1446))
+- Split the `inproc` signal handler (and UEF on Windows) into a safe handler part and an "unsafe" handler thread. This minimizes exposure to undefined behavior inside the signal handler. ([#1446](https://github.com/getsentry/sentry-native/pull/1446))
+
 ## 0.12.1
 
 **Fixes**:
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 50beda333..0abb29572 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -144,15 +144,6 @@ if(LINUX)
 		set(CMAKE_ASM_FLAGS  "${CMAKE_ASM_FLAGS} -m32 -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE")
 		set_property(GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS OFF)
 	endif()
-
-        execute_process(
-                COMMAND ${CMAKE_C_COMPILER} -dumpmachine
-                OUTPUT_VARIABLE TARGET_TRIPLET
-                OUTPUT_STRIP_TRAILING_WHITESPACE
-        )
-        if(TARGET_TRIPLET MATCHES "musl")
-                set(MUSL TRUE)
-        endif()
 endif()
 
 # CMAKE_POSITION_INDEPENDENT_CODE must be set BEFORE adding any libraries (including subprojects)
@@ -266,9 +257,12 @@ endif()
 
 if(ANDROID)
 	set(SENTRY_WITH_LIBUNWINDSTACK TRUE)
-elseif(MUSL)
+elseif(LINUX)
 	set(SENTRY_WITH_LIBUNWIND TRUE)
+elseif(APPLE)
+	set(SENTRY_WITH_LIBUNWIND_MAC TRUE)
 elseif(NOT WIN32 AND NOT PROSPERO)
+	# this should never be true, but we keep it as fallback
 	set(SENTRY_WITH_LIBBACKTRACE TRUE)
 endif()
 
@@ -621,12 +615,30 @@ endif()
 if(SENTRY_WITH_LIBUNWIND)
 	if(LINUX)
 		option(SENTRY_LIBUNWIND_SHARED "Link to shared libunwind" ${SENTRY_BUILD_SHARED_LIBS})
-		find_path(LIBUNWIND_INCLUDE_DIR libunwind.h PATH_SUFFIXES libunwind REQUIRED)
+
+		# ensure suffix and search paths for include and library search (specifically for multilib)
+		if(SENTRY_BUILD_FORCE32)
+			set(_unwind_triplet "i386-linux-gnu")
+			set(_i386_libpaths /usr/lib/i386-linux-gnu /lib/i386-linux-gnu /usr/lib32 /lib32)
+		else()
+			set(_unwind_triplet "${CMAKE_LIBRARY_ARCHITECTURE}")
+		endif()
+
+		find_path(LIBUNWIND_INCLUDE_DIR libunwind.h
+			PATH_SUFFIXES
+				${_unwind_triplet}
+				libunwind # some distros put the libunwind headers into a subdirectory
+			REQUIRED)
 		if(SENTRY_LIBUNWIND_SHARED)
-			find_library(LIBUNWIND_LIBRARIES unwind REQUIRED)
+			find_library(LIBUNWIND_LIBRARIES unwind PATH_SUFFIXES ${_unwind_triplet} REQUIRED)
 		else()
-			find_library(LIBUNWIND_LIBRARIES libunwind.a REQUIRED)
-			find_library(LZMA_LIBRARY lzma)
+			find_library(LIBUNWIND_LIBRARIES libunwind.a PATH_SUFFIXES ${_unwind_triplet} REQUIRED)
+			if(SENTRY_BUILD_FORCE32)
+				find_library(LZMA_LIBRARY lzma PATHS ${_i386_libpaths} NO_DEFAULT_PATH)
+			else()
+				find_library(LZMA_LIBRARY lzma PATH_SUFFIXES ${_unwind_triplet})
+			endif()
+
 			if(LZMA_LIBRARY)
 				list(APPEND LIBUNWIND_LIBRARIES ${LZMA_LIBRARY})
 			endif()
@@ -685,6 +697,15 @@ if(SENTRY_BACKEND_CRASHPAD)
 		$<BUILD_INTERFACE:crashpad::client>
 		$<INSTALL_INTERFACE:sentry_crashpad::client>
 	)
+
+	# ignore #include_next <signal.h> warning being a GCC extension via pedantic
+	if (LINUX)
+		target_include_directories(sentry
+				SYSTEM PRIVATE
+				external/crashpad/compat/linux
+		)
+	endif()
+
 	install(EXPORT crashpad_export NAMESPACE sentry_crashpad:: FILE sentry_crashpad-targets.cmake
 		DESTINATION "${CMAKE_INSTALL_CMAKEDIR}"
 	)
@@ -813,6 +834,21 @@ if(SENTRY_BUILD_EXAMPLES)
 		# Disable all optimizations for the `sentry_example` in gcc/clang. This allows us to keep crash triggers simple.
 		# The effects besides reproducible code-gen across compiler versions, will be negligible for build- and runtime.
 		target_compile_options(sentry_example PRIVATE $<BUILD_INTERFACE:-O0>)
+
+		# Most Linux distros will build executables with PIE. However, if we build a static library, not all static
+		# dependencies being packaged with the same distro, are built with PIC enabled. This will lead `ld` to error on
+		# relocation data, thus failing the build/tests. There is no need to build the example with PIE enabled.
+		# Similarly, LTO must be disabled since the static library archive might have different LTO bytecode version.
+		if (LINUX AND NOT SENTRY_BUILD_SHARED_LIBS)
+			target_link_options(sentry_example PRIVATE
+				-no-pie
+				-fno-lto
+			)
+			target_compile_options(sentry_example PRIVATE
+				-fno-pie
+				-fno-lto
+			)
+		endif()
 	endif()
 
 	# set static runtime if enabled
@@ -824,11 +860,17 @@ if(SENTRY_BUILD_EXAMPLES)
 		set_target_properties(sentry_example PROPERTIES FOLDER ${SENTRY_FOLDER})
 	endif()
 
+	if(CMAKE_GENERATOR STREQUAL "Xcode")
+		set(SENTRY_FIXTURE_OUTPUT_DIR "$<TARGET_BUNDLE_DIR:sentry_example>/..")
+	else()
+		set(SENTRY_FIXTURE_OUTPUT_DIR "$<TARGET_FILE_DIR:sentry_example>")
+	endif()
+
 	add_custom_command(TARGET sentry_example POST_BUILD
-		COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_CURRENT_SOURCE_DIR}/tests/fixtures/minidump.dmp" "$<TARGET_FILE_DIR:sentry_example>/minidump.dmp")
+		COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_CURRENT_SOURCE_DIR}/tests/fixtures/minidump.dmp" "${SENTRY_FIXTURE_OUTPUT_DIR}/minidump.dmp")
 
 	add_custom_command(TARGET sentry_example POST_BUILD
-		COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_CURRENT_SOURCE_DIR}/tests/fixtures/view-hierarchy.json" "$<TARGET_FILE_DIR:sentry_example>/view-hierarchy.json")
+		COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_CURRENT_SOURCE_DIR}/tests/fixtures/view-hierarchy.json" "${SENTRY_FIXTURE_OUTPUT_DIR}/view-hierarchy.json")
 endif()
 
 # Limit the exported symbols when sentry is built as a shared library to those with a "sentry_" prefix:
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 738d1c110..2aff0b054 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -153,6 +153,9 @@ elseif(SENTRY_BACKEND_NONE)
 endif()
 
 # unwinder
+sentry_target_sources_cwd(sentry
+	unwinder/sentry_unwinder.h
+)
 if(SENTRY_WITH_LIBBACKTRACE)
 	target_compile_definitions(sentry PRIVATE SENTRY_WITH_UNWINDER_LIBBACKTRACE)
 	sentry_target_sources_cwd(sentry
@@ -167,6 +170,13 @@ if(SENTRY_WITH_LIBUNWIND)
 	)
 endif()
 
+if(SENTRY_WITH_LIBUNWIND_MAC)
+	target_compile_definitions(sentry PRIVATE SENTRY_WITH_UNWINDER_LIBUNWIND_MAC)
+	sentry_target_sources_cwd(sentry
+			unwinder/sentry_unwinder_libunwind_mac.c
+	)
+endif()
+
 if(SENTRY_WITH_LIBUNWINDSTACK)
 	target_compile_definitions(sentry PRIVATE SENTRY_WITH_UNWINDER_LIBUNWINDSTACK)
 	sentry_target_sources_cwd(sentry
diff --git a/src/backends/sentry_backend_breakpad.cpp b/src/backends/sentry_backend_breakpad.cpp
index 1c737ce55..528a3c00a 100644
--- a/src/backends/sentry_backend_breakpad.cpp
+++ b/src/backends/sentry_backend_breakpad.cpp
@@ -261,22 +261,22 @@ breakpad_backend_startup(
         && defined(SENTRY_THREAD_STACK_GUARANTEE_AUTO_INIT)
     sentry__set_default_thread_stack_guarantee();
 #    endif
-    backend->data = new google_breakpad::ExceptionHandler(
+    backend->data = new (std::nothrow) google_breakpad::ExceptionHandler(
         current_run_folder->path_w, nullptr, breakpad_backend_callback, nullptr,
         google_breakpad::ExceptionHandler::HANDLER_EXCEPTION);
 #elif defined(SENTRY_PLATFORM_MACOS)
     // If process is being debugged and there are breakpoints set it will cause
     // task_set_exception_ports to crash the whole process and debugger
-    backend->data = new google_breakpad::ExceptionHandler(
-        current_run_folder->path, nullptr, breakpad_backend_callback, nullptr,
-        !IsDebuggerActive(), nullptr);
+    backend->data = new (std::nothrow)
+        google_breakpad::ExceptionHandler(current_run_folder->path, nullptr,
+            breakpad_backend_callback, nullptr, !IsDebuggerActive(), nullptr);
 #elif defined(SENTRY_PLATFORM_IOS)
-    backend->data
-        = new google_breakpad::ExceptionHandler(current_run_folder->path,
-            nullptr, breakpad_backend_callback, nullptr, true, nullptr);
+    backend->data = new (std::nothrow)
+        google_breakpad::ExceptionHandler(current_run_folder->path, nullptr,
+            breakpad_backend_callback, nullptr, true, nullptr);
 #else
     google_breakpad::MinidumpDescriptor descriptor(current_run_folder->path);
-    backend->data = new google_breakpad::ExceptionHandler(
+    backend->data = new (std::nothrow) google_breakpad::ExceptionHandler(
         descriptor, nullptr, breakpad_backend_callback, nullptr, true, -1);
 #endif
     return backend->data == nullptr;
diff --git a/src/backends/sentry_backend_crashpad.cpp b/src/backends/sentry_backend_crashpad.cpp
index 181e3d6c4..72de92078 100644
--- a/src/backends/sentry_backend_crashpad.cpp
+++ b/src/backends/sentry_backend_crashpad.cpp
@@ -32,8 +32,7 @@ extern "C" {
 #if defined(__GNUC__)
 #    pragma GCC diagnostic push
 #    pragma GCC diagnostic ignored "-Wunused-parameter"
-#    pragma GCC diagnostic ignored "-Wgnu-zero-variadic-macro-arguments"
-#    pragma GCC diagnostic ignored "-Wfour-char-constants"
+#    pragma GCC diagnostic ignored "-Wmultichar"
 #elif defined(_MSC_VER)
 #    pragma warning(push)
 #    pragma warning(disable : 4100) // unreferenced formal parameter
@@ -310,11 +309,11 @@ flush_scope_from_handler(
 
 #    ifdef SENTRY_PLATFORM_WINDOWS
 static bool
-sentry__crashpad_handler(EXCEPTION_POINTERS *ExceptionInfo)
+crashpad_handler(EXCEPTION_POINTERS *ExceptionInfo)
 {
 #    else
 static bool
-sentry__crashpad_handler(int signum, siginfo_t *info, ucontext_t *user_context)
+crashpad_handler(int signum, siginfo_t *info, ucontext_t *user_context)
 {
     sentry__page_allocator_enable();
     sentry__enter_signal_handler();
@@ -540,7 +539,7 @@ crashpad_backend_startup(
     // Initialize database first, flushing the consent later on as part of
     // `sentry_init` will persist the upload flag.
     data->db = crashpad::CrashReportDatabase::Initialize(database).release();
-    data->client = new crashpad::CrashpadClient;
+    data->client = new (std::nothrow) crashpad::CrashpadClient;
     char *minidump_url
         = sentry__dsn_get_minidump_url(options->dsn, options->user_agent);
     if (minidump_url) {
@@ -581,8 +580,7 @@ crashpad_backend_startup(
     }
 
 #if defined(SENTRY_PLATFORM_LINUX) || defined(SENTRY_PLATFORM_WINDOWS)
-    crashpad::CrashpadClient::SetFirstChanceExceptionHandler(
-        &sentry__crashpad_handler);
+    crashpad::CrashpadClient::SetFirstChanceExceptionHandler(&crashpad_handler);
 #endif
 #ifdef SENTRY_PLATFORM_LINUX
     // Crashpad was recently changed to register its own signal stack, which for
@@ -593,7 +591,7 @@ crashpad_backend_startup(
     if (g_signal_stack.ss_sp) {
         g_signal_stack.ss_size = SIGNAL_STACK_SIZE;
         g_signal_stack.ss_flags = 0;
-        sigaltstack(&g_signal_stack, 0);
+        sigaltstack(&g_signal_stack, nullptr);
     }
 #endif
 
@@ -632,9 +630,9 @@ crashpad_backend_shutdown(sentry_backend_t *backend)
 
 #ifdef SENTRY_PLATFORM_LINUX
     g_signal_stack.ss_flags = SS_DISABLE;
-    sigaltstack(&g_signal_stack, 0);
+    sigaltstack(&g_signal_stack, nullptr);
     sentry_free(g_signal_stack.ss_sp);
-    g_signal_stack.ss_sp = NULL;
+    g_signal_stack.ss_sp = nullptr;
 #endif
 }
 
@@ -745,8 +743,8 @@ crashpad_backend_prune_database(sentry_backend_t *backend)
     // large.
     data->db->CleanDatabase(60 * 60 * 24 * 2);
     crashpad::BinaryPruneCondition condition(crashpad::BinaryPruneCondition::OR,
-        new crashpad::DatabaseSizePruneCondition(1024 * 8),
-        new crashpad::AgePruneCondition(2));
+        new (std::nothrow) crashpad::DatabaseSizePruneCondition(1024 * 8),
+        new (std::nothrow) crashpad::AgePruneCondition(2));
     crashpad::PruneCrashReportDatabase(data->db, &condition);
 }
 
@@ -825,12 +823,11 @@ sentry__backend_new(void)
     }
     memset(backend, 0, sizeof(sentry_backend_t));
 
-    auto *data = SENTRY_MAKE(crashpad_state_t);
+    auto *data = new (std::nothrow) crashpad_state_t {};
     if (!data) {
         sentry_free(backend);
         return nullptr;
     }
-    memset(data, 0, sizeof(crashpad_state_t));
     data->scope_flush = false;
     data->crashed = false;
 
diff --git a/src/backends/sentry_backend_inproc.c b/src/backends/sentry_backend_inproc.c
index 8a8755a75..ec65cf0e1 100644
--- a/src/backends/sentry_backend_inproc.c
+++ b/src/backends/sentry_backend_inproc.c
@@ -4,6 +4,7 @@
 #include "sentry_alloc.h"
 #include "sentry_backend.h"
 #include "sentry_core.h"
+#include "sentry_cpu_relax.h"
 #include "sentry_database.h"
 #include "sentry_envelope.h"
 #include "sentry_logger.h"
@@ -18,13 +19,153 @@
 #include "sentry_transport.h"
 #include "sentry_unix_pageallocator.h"
 #include "transports/sentry_disk_transport.h"
+#include <errno.h>
+#include <limits.h>
 #include <string.h>
 
-#define SIGNAL_DEF(Sig, Desc) { Sig, #Sig, Desc }
+/**
+ * Inproc Backend Introduction
+ *
+ * As the name suggests the inproc backend runs the crash handling entirely
+ * inside the process and thus is the right choice for platforms that
+ * are limited in process creation/spawning/cloning (or even deploying a
+ * separate release artifact like with `crashpad`). It is also very lightweight
+ * in terms of toolchain dependencies because it does not require a C++ standard
+ * library.
+ *
+ * It targets UNIX and Windows (effectively supporting all target platforms of
+ * the Native SDK) and uses POSIX signal handling on UNIX and unhandled
+ * exception filters (UEF) on Windows. Whenever a signal handler is mentioned
+ * in the code or comments, one can replace that with UEF on Windows.
+ *
+ * In its current implementation it only gathers the crash context for the
+ * crashed thread and does not attempt to stop any other threads. While this
+ * can be considered a downside for some users, it allows additional handlers
+ * to process the crashed process again, which the other backends currenlty
+ * can't guarantee to work. Additional crash signals coming from other threads
+ * will be blocked indefinitely until previous handler takes over.
+ *
+ * The inproc backend splits the handler in two parts:
+ *   - a signal handler/unhandled exception filter that severely limits what we
+ *     can do, focusing on response to the OS mechanism and almost zero policy.
+ *   - a separate handler thread that does most of the typical sentry error
+ *     handling and policy implementation, with a bit more freedom.
+ *
+ * Only if the handler thread has crashed or is otherwise unavailable, will we
+ * execute the unsafe part inside the signal-handler itself, as a last chance
+ * fallback for report creation. The signal-handler part should not use any
+ * synchronization or signal-unsafe function from `libc` (see function-level
+ * comment), even access to options is ideally done before during
+ * initailization. If access to option or scope (or any other global context)
+ * is required this should happen in the handler thread.
+ *
+ * The handler thread is started during backend initialization and will be
+ * triggered by the signal handler via a POSIX pipe on which the handler thread
+ * blocks from the start (similarly on Windows, which uses a Semaphore). While
+ * the handler thread handles a crash, the signal handler (or UEF) blocks itself
+ * on an ACK pipe/semaphore. Once the handler thread is done processing the
+ * crash, it will unblock the signal handler which resets the synchronization
+ * during crash handling and invokes the handler chain.
+ *
+ * The most important functions and their meaning:
+ *
+ *  - `handle_signal`/`handle_exception`: top-level entry points called directly
+ *    from the operating system. They pack sentry_ucontext_t and call...
+ *  - `process_ucontext`: the actual signal-handler/UEF, primarily manages the
+ *    interaction with the OS and other handlers and calls..
+ *  - `dispatch_ucontext`: this is the place that decides on where to run the
+ *    sentry error event creation and that does the synchronization with...
+ *  - `handler_thread_main`: implements the handler thread loop, blocks until
+ *    unblocked by the signal handler and finally calls...
+ *  - `process_ucontext_deferred`: the implementation of sentry specific
+ *    handler policy leading to crash event construction, it defers to...
+ *  - `make_signal_event`: that is purely about making a crash event object and
+ *    filling the context data
+ *
+ * The `on_crash` and `before_send` hook usually run on the handler thread
+ * during `process_ucontext_deferred` but users cannot rely on any particular
+ * thread to call their callbacks. However, they can be sure that the crashed
+ * thread won't run during their the execution of their callback code.
+ *
+ * Note on unwinders:
+ *
+ * The backend relies on an unwinder that can backtrace from a user context.
+ * This is important because the unwinder usually runs in the context of the
+ * handler thread, where a direct backtrace makes no longer any sense (even if
+ * it was signal safe). We do not dispatch to the handler thread for targets
+ * that still use `libbacktrace`, and instead run the unsafe part directly in
+ * the signal handler. This is primarily to not break these target, but in
+ * general the `libbacktrace`-based unwinder should be considered deprecated.
+ *
+ * Notes on signal handling in other runtimes:
+ *
+ * The .net runtimes currently rely on signal handling to deliver managed
+ * exceptions caused from the generated native code. Due to the initialization
+ * order the inproc backend will receive those signals which it should not
+ * process. On setups like these it offers a handler strategy that chains the
+ * previous signal handler first, allowing the .net runtime handler to either
+ * immediately jump back into runtime code or reset IP/SP so that the returning
+ * signal handler continues from the managed exception rather then the crashed
+ * instruction.
+ *
+ * The Android runtime (ART) otoh, while also relying heavily on signal handling
+ * to communicate between generated code and the garbage collector entirely
+ * shields the signals from us (via `libsigchain` special handler ordering) and
+ * only forwards signals that are not relevant to runtime. However, it relies on
+ * each thread having a specific sigaltstack setup, which can lead to crashes if
+ * overriden. For this reason, we do not set the sigaltstack of any thread if
+ * one was already configured even if the size is smaller than we'd want. Since
+ * most of the handler runs in a separate thread the size limitation of any pre-
+ * configured `sigaltstack` is not a problem to our more complex handler code.
+ */
 
+#define SIGNAL_DEF(Sig, Desc) { Sig, #Sig, Desc }
 #define MAX_FRAMES 128
 
+// the data exchange between the signal handler and the handler thread
+typedef struct sentry_inproc_handler_state_s {
+    sentry_ucontext_t uctx;
 #ifdef SENTRY_PLATFORM_UNIX
+    siginfo_t siginfo_storage;
+    ucontext_t user_context_storage;
+#endif
+    const struct signal_slot *sig_slot;
+} sentry_inproc_handler_state_t;
+
+// "data" struct containing options to prevent mutex access in signal handler
+typedef struct sentry_inproc_backend_config_s {
+    bool enable_logging_when_crashed;
+    sentry_handler_strategy_t handler_strategy;
+} sentry_inproc_backend_config_t;
+
+// global instance for data-exchange between signal handler and handler thread
+static sentry_inproc_handler_state_t g_handler_state;
+// global instance for backend configuration state
+static sentry_inproc_backend_config_t g_backend_config;
+
+// handler thread state and synchronization variables
+static sentry_threadid_t g_handler_thread;
+// true once the handler thread starts waiting
+static volatile long g_handler_thread_ready = 0;
+// shutdown loop invariant
+static volatile long g_handler_should_exit = 0;
+// signal handler tells handler thread to start working
+static volatile long g_handler_has_work = 0;
+
+// trigger/schedule primitives that block the other side until this side is done
+#ifdef SENTRY_PLATFORM_UNIX
+static int g_handler_pipe[2] = { -1, -1 };
+static int g_handler_ack_pipe[2] = { -1, -1 };
+#elif defined(SENTRY_PLATFORM_WINDOWS)
+static HANDLE g_handler_semaphore = NULL;
+static HANDLE g_handler_ack_semaphore = NULL;
+#endif
+
+static int start_handler_thread(void);
+static void stop_handler_thread(void);
+
+#ifdef SENTRY_PLATFORM_UNIX
+#    include <unistd.h>
 struct signal_slot {
     int signum;
     const char *signame;
@@ -77,8 +218,24 @@ invoke_signal_handler(int signum, siginfo_t *info, void *user_context)
 
 static int
 startup_inproc_backend(
-    sentry_backend_t *UNUSED(backend), const sentry_options_t *UNUSED(options))
+    sentry_backend_t *backend, const sentry_options_t *options)
 {
+    // get option state so we don't need to sync read during signal handling
+    g_backend_config.enable_logging_when_crashed
+        = options ? options->enable_logging_when_crashed : true;
+    g_backend_config.handler_strategy =
+#    if defined(SENTRY_PLATFORM_LINUX)
+        options ? sentry_options_get_handler_strategy(options) :
+#    endif
+                SENTRY_HANDLER_STRATEGY_DEFAULT;
+    if (backend) {
+        backend->data = &g_backend_config;
+    }
+
+    if (start_handler_thread() != 0) {
+        return 1;
+    }
+
     // save the old signal handlers
     memset(g_previous_handlers, 0, sizeof(g_previous_handlers));
     for (size_t i = 0; i < SIGNAL_COUNT; ++i) {
@@ -119,8 +276,10 @@ startup_inproc_backend(
 }
 
 static void
-shutdown_inproc_backend(sentry_backend_t *UNUSED(backend))
+shutdown_inproc_backend(sentry_backend_t *backend)
 {
+    stop_handler_thread();
+
     if (g_signal_stack.ss_sp) {
         g_signal_stack.ss_flags = SS_DISABLE;
         sigaltstack(&g_signal_stack, 0);
@@ -128,6 +287,9 @@ shutdown_inproc_backend(sentry_backend_t *UNUSED(backend))
         g_signal_stack.ss_sp = NULL;
     }
     reset_signal_handlers();
+    if (backend) {
+        backend->data = NULL;
+    }
 }
 
 #elif defined(SENTRY_PLATFORM_WINDOWS)
@@ -169,25 +331,40 @@ static LONG WINAPI handle_exception(EXCEPTION_POINTERS *);
 
 static int
 startup_inproc_backend(
-    sentry_backend_t *UNUSED(backend), const sentry_options_t *UNUSED(options))
+    sentry_backend_t *backend, const sentry_options_t *options)
 {
+    g_backend_config.enable_logging_when_crashed
+        = options ? options->enable_logging_when_crashed : true;
+    g_backend_config.handler_strategy = SENTRY_HANDLER_STRATEGY_DEFAULT;
+    if (backend) {
+        backend->data = &g_backend_config;
+    }
+
 #    if !defined(SENTRY_BUILD_SHARED)                                          \
         && defined(SENTRY_THREAD_STACK_GUARANTEE_AUTO_INIT)
     sentry__set_default_thread_stack_guarantee();
 #    endif
+    if (start_handler_thread() != 0) {
+        return 1;
+    }
     g_previous_handler = SetUnhandledExceptionFilter(&handle_exception);
     SetErrorMode(SEM_FAILCRITICALERRORS);
     return 0;
 }
 
 static void
-shutdown_inproc_backend(sentry_backend_t *UNUSED(backend))
+shutdown_inproc_backend(sentry_backend_t *backend)
 {
+    stop_handler_thread();
+
     LPTOP_LEVEL_EXCEPTION_FILTER current_handler
         = SetUnhandledExceptionFilter(g_previous_handler);
     if (current_handler != &handle_exception) {
         SetUnhandledExceptionFilter(current_handler);
     }
+    if (backend) {
+        backend->data = NULL;
+    }
 }
 
 #endif
@@ -515,10 +692,11 @@ make_signal_event(const struct signal_slot *sig_slot,
     if (sig_slot) {
         sentry_value_set_by_key(
             signal_meta, "name", sentry_value_new_string(sig_slot->signame));
-        // at least on windows, the signum is a true u32 which we can't
-        // otherwise represent.
-        sentry_value_set_by_key(signal_meta, "number",
-            sentry_value_new_double((double)sig_slot->signum));
+        // relay interprets the signal number as an i64:
+        // https://github.com/getsentry/relay/blob/e96e4b037cfddaa7b0fb97a0909d100dde034f8e/relay-event-schema/src/protocol/mechanism.rs#L52-L53
+        // This covers the signal number ranges of all supported platforms.
+        sentry_value_set_by_key(
+            signal_meta, "number", sentry_value_new_int64(sig_slot->signum));
     }
     sentry_value_set_by_key(mechanism_meta, "signal", signal_meta);
     sentry_value_set_by_key(
@@ -563,97 +741,32 @@ make_signal_event(const struct signal_slot *sig_slot,
     return event;
 }
 
+/**
+ * This is the signal-unsafe part of the inproc handler. Everything that
+ * requires stdio, time-formatting/-capture or serialization must happen here.
+ *
+ * Although we can use signal-unsafe functions here, this should still be
+ * written with care. Don't rely on thread synchronization or the system
+ * allocator since the program is in a crashed state. At least one thread no
+ * longer progresses and memory can be corrupted.
+ */
 static void
-handle_ucontext(const sentry_ucontext_t *uctx)
+process_ucontext_deferred(
+    const sentry_ucontext_t *uctx, const struct signal_slot *sig_slot)
 {
-    // Disable logging during crash handling if the option is set
-    SENTRY_WITH_OPTIONS (options) {
-        if (!options->enable_logging_when_crashed) {
-            sentry__logger_disable();
-        }
-    }
-
     SENTRY_INFO("entering signal handler");
 
-    sentry_handler_strategy_t strategy = SENTRY_HANDLER_STRATEGY_DEFAULT;
-#ifdef SENTRY_PLATFORM_UNIX
-    // inform the sentry_sync system that we're in a signal handler.  This will
-    // make mutexes spin on a spinlock instead as it's no longer safe to use a
-    // pthread mutex.
-    sentry__enter_signal_handler();
-#endif
-
     SENTRY_WITH_OPTIONS (options) {
-#ifdef SENTRY_PLATFORM_LINUX
-        // On Linux (and thus Android) CLR/Mono converts signals provoked by
-        // AOT/JIT-generated native code into managed code exceptions. In these
-        // cases, we shouldn't react to the signal at all and let their handler
-        // discontinue the signal chain by invoking the runtime handler before
-        // we process the signal.
-        strategy = sentry_options_get_handler_strategy(options);
-        if (strategy == SENTRY_HANDLER_STRATEGY_CHAIN_AT_START) {
-            SENTRY_DEBUG("defer to runtime signal handler at start");
-            // there is a good chance that we won't return from the previous
-            // handler and that would mean we couldn't enter this handler with
-            // the next signal coming in if we didn't "leave" here.
-            sentry__leave_signal_handler();
-            if (!options->enable_logging_when_crashed) {
-                sentry__logger_enable();
-            }
-
-            uintptr_t ip = get_instruction_pointer(uctx);
-            uintptr_t sp = get_stack_pointer(uctx);
-
-            // invoke the previous handler (typically the CLR/Mono
-            // signal-to-managed-exception handler)
-            invoke_signal_handler(
-                uctx->signum, uctx->siginfo, (void *)uctx->user_context);
-
-            // If the execution returns here in AOT mode, and the instruction
-            // or stack pointer were changed, it means CLR/Mono converted the
-            // signal into a managed exception and transferred execution to a
-            // managed exception handler.
-            // https://github.com/dotnet/runtime/blob/6d96e28597e7da0d790d495ba834cc4908e442cd/src/mono/mono/mini/exceptions-arm64.c#L538
-            if (ip != get_instruction_pointer(uctx)
-                || sp != get_stack_pointer(uctx)) {
-                SENTRY_DEBUG("runtime converted the signal to a managed "
-                             "exception, we do not handle the signal");
-                return;
-            }
-
-            // let's re-enter because it means this was an actual native crash
-            if (!options->enable_logging_when_crashed) {
-                sentry__logger_disable();
-            }
-            sentry__enter_signal_handler();
-            SENTRY_DEBUG(
-                "return from runtime signal handler, we handle the signal");
-        }
-#endif
-
-        const struct signal_slot *sig_slot = NULL;
-        for (int i = 0; i < SIGNAL_COUNT; ++i) {
-#ifdef SENTRY_PLATFORM_UNIX
-            if (SIGNAL_DEFINITIONS[i].signum == uctx->signum) {
-#elif defined SENTRY_PLATFORM_WINDOWS
-            if (SIGNAL_DEFINITIONS[i].signum
-                == uctx->exception_ptrs.ExceptionRecord->ExceptionCode) {
-#else
-#    error Unsupported platform
-#endif
-                sig_slot = &SIGNAL_DEFINITIONS[i];
-            }
-        }
-
-#ifdef SENTRY_PLATFORM_UNIX
-        // use a signal-safe allocator before we tear down.
-        sentry__page_allocator_enable();
-#endif
         // Flush logs in a crash-safe manner before crash handling
         if (options->enable_logs) {
             sentry__logs_flush_crash_safe();
         }
 
+        sentry_handler_strategy_t strategy =
+#if defined(SENTRY_PLATFORM_LINUX)
+            options ? sentry_options_get_handler_strategy(options) :
+#endif
+                    SENTRY_HANDLER_STRATEGY_DEFAULT;
         sentry_value_t event = make_signal_event(sig_slot, uctx, strategy);
         bool should_handle = true;
         sentry__write_crash_marker(options);
@@ -699,9 +812,384 @@ handle_ucontext(const sentry_ucontext_t *uctx)
 
         // after capturing the crash event, dump all the envelopes to disk
         sentry__transport_dump_queue(options->transport, options->run);
+
+        SENTRY_INFO("crash has been captured");
+    }
+}
+
+SENTRY_THREAD_FN
+handler_thread_main(void *UNUSED(data))
+{
+    sentry__atomic_store(&g_handler_thread_ready, 1);
+
+    while (!sentry__atomic_fetch(&g_handler_should_exit)) {
+#ifdef SENTRY_PLATFORM_UNIX
+        char command = 0;
+        ssize_t rv = read(g_handler_pipe[0], &command, 1);
+        if (rv == -1 && errno == EINTR) {
+            continue;
+        }
+        if (rv <= 0) {
+            if (sentry__atomic_fetch(&g_handler_should_exit)) {
+                break;
+            }
+            continue;
+        }
+#elif defined(SENTRY_PLATFORM_WINDOWS)
+        DWORD wait_result = WaitForSingleObject(g_handler_semaphore, INFINITE);
+        if (wait_result != WAIT_OBJECT_0) {
+            continue;
+        }
+        if (sentry__atomic_fetch(&g_handler_should_exit)) {
+            break;
+        }
+#endif
+
+        if (!sentry__atomic_fetch(&g_handler_has_work)) {
+            continue;
+        }
+
+        process_ucontext_deferred(
+            &g_handler_state.uctx, g_handler_state.sig_slot);
+        sentry__atomic_store(&g_handler_has_work, 0);
+#ifdef SENTRY_PLATFORM_UNIX
+        if (g_handler_ack_pipe[1] >= 0) {
+            char c = 1;
+            ssize_t rv;
+            do {
+                rv = write(g_handler_ack_pipe[1], &c, 1);
+            } while (rv == -1 && errno == EINTR);
+        }
+#elif defined(SENTRY_PLATFORM_WINDOWS)
+        if (g_handler_ack_semaphore) {
+            ReleaseSemaphore(g_handler_ack_semaphore, 1, NULL);
+        }
+#endif
+    }
+
+#ifdef SENTRY_PLATFORM_WINDOWS
+    return 0;
+#else
+    return NULL;
+#endif
+}
+
+static int
+start_handler_thread(void)
+{
+    if (sentry__atomic_fetch(&g_handler_thread_ready)) {
+        return 0;
+    }
+
+    sentry__thread_init(&g_handler_thread);
+    sentry__atomic_store(&g_handler_should_exit, 0);
+    sentry__atomic_store(&g_handler_has_work, 0);
+
+#ifdef SENTRY_PLATFORM_UNIX
+    if (pipe(g_handler_pipe) != 0) {
+        SENTRY_WARNF("failed to create handler pipe: %s", strerror(errno));
+        return 1;
+    }
+    if (pipe(g_handler_ack_pipe) != 0) {
+        SENTRY_WARNF("failed to create handler ack pipe: %s", strerror(errno));
+        close(g_handler_pipe[0]);
+        close(g_handler_pipe[1]);
+        g_handler_pipe[0] = -1;
+        g_handler_pipe[1] = -1;
+        return 1;
+    }
+#elif defined(SENTRY_PLATFORM_WINDOWS)
+    g_handler_semaphore = CreateSemaphoreW(NULL, 0, LONG_MAX, NULL);
+    if (!g_handler_semaphore) {
+        SENTRY_WARN("failed to create handler semaphore");
+        return 1;
+    }
+    g_handler_ack_semaphore = CreateSemaphoreW(NULL, 0, LONG_MAX, NULL);
+    if (!g_handler_ack_semaphore) {
+        SENTRY_WARN("failed to create handler ack semaphore");
+        CloseHandle(g_handler_semaphore);
+        g_handler_semaphore = NULL;
+        return 1;
+    }
+#endif
+
+    if (sentry__thread_spawn(&g_handler_thread, handler_thread_main, NULL)
+        != 0) {
+        SENTRY_WARN("failed to spawn handler thread");
+#ifdef SENTRY_PLATFORM_UNIX
+        close(g_handler_pipe[0]);
+        close(g_handler_pipe[1]);
+        g_handler_pipe[0] = -1;
+        g_handler_pipe[1] = -1;
+        close(g_handler_ack_pipe[0]);
+        close(g_handler_ack_pipe[1]);
+        g_handler_ack_pipe[0] = -1;
+        g_handler_ack_pipe[1] = -1;
+#elif defined(SENTRY_PLATFORM_WINDOWS)
+        CloseHandle(g_handler_semaphore);
+        g_handler_semaphore = NULL;
+        CloseHandle(g_handler_ack_semaphore);
+        g_handler_ack_semaphore = NULL;
+#endif
+        return 1;
+    }
+
+    return 0;
+}
+
+static void
+stop_handler_thread(void)
+{
+    if (!sentry__atomic_fetch(&g_handler_thread_ready)) {
+        return;
+    }
+
+    sentry__atomic_store(&g_handler_should_exit, 1);
+
+#ifdef SENTRY_PLATFORM_UNIX
+    if (g_handler_pipe[1] >= 0) {
+        char c = 0;
+        ssize_t rv;
+        do {
+            rv = write(g_handler_pipe[1], &c, 1);
+        } while (rv == -1 && errno == EINTR);
     }
+#elif defined(SENTRY_PLATFORM_WINDOWS)
+    if (g_handler_semaphore) {
+        ReleaseSemaphore(g_handler_semaphore, 1, NULL);
+    }
+#endif
 
-    SENTRY_INFO("crash has been captured");
+    sentry__thread_join(g_handler_thread);
+    sentry__thread_free(&g_handler_thread);
+
+#ifdef SENTRY_PLATFORM_UNIX
+    if (g_handler_pipe[0] >= 0) {
+        close(g_handler_pipe[0]);
+        g_handler_pipe[0] = -1;
+    }
+    if (g_handler_pipe[1] >= 0) {
+        close(g_handler_pipe[1]);
+        g_handler_pipe[1] = -1;
+    }
+    if (g_handler_ack_pipe[0] >= 0) {
+        close(g_handler_ack_pipe[0]);
+        g_handler_ack_pipe[0] = -1;
+    }
+    if (g_handler_ack_pipe[1] >= 0) {
+        close(g_handler_ack_pipe[1]);
+        g_handler_ack_pipe[1] = -1;
+    }
+#elif defined(SENTRY_PLATFORM_WINDOWS)
+    if (g_handler_semaphore) {
+        CloseHandle(g_handler_semaphore);
+        g_handler_semaphore = NULL;
+    }
+    if (g_handler_ack_semaphore) {
+        CloseHandle(g_handler_ack_semaphore);
+        g_handler_ack_semaphore = NULL;
+    }
+#endif
+
+    sentry__atomic_store(&g_handler_thread_ready, 0);
+    sentry__atomic_store(&g_handler_should_exit, 0);
+}
+
+static bool
+has_handler_thread_crashed(void)
+{
+    const sentry_threadid_t current_thread = sentry__current_thread();
+    if (sentry__atomic_fetch(&g_handler_thread_ready)
+        && sentry__threadid_equal(current_thread, g_handler_thread)) {
+#ifdef SENTRY_PLATFORM_UNIX
+        static const char msg[] = "[sentry] FATAL crash in handler thread, "
+                                  "falling back to previous handler\n";
+        const ssize_t rv = write(STDERR_FILENO, msg, sizeof(msg) - 1);
+        (void)rv;
+#else
+        static const char msg[] = "[sentry] FATAL crash in handler thread, "
+                                  "UEF continues search\n";
+        OutputDebugStringA(msg);
+        HANDLE stderr_handle = GetStdHandle(STD_ERROR_HANDLE);
+        if (stderr_handle && stderr_handle != INVALID_HANDLE_VALUE) {
+            DWORD written;
+            WriteFile(stderr_handle, msg, (DWORD)strlen(msg), &written, NULL);
+        }
+#endif
+        return true;
+    }
+    return false;
+}
+
+static void
+dispatch_ucontext(
+    const sentry_ucontext_t *uctx, const struct signal_slot *sig_slot)
+{
+#ifdef SENTRY_WITH_UNWINDER_LIBBACKTRACE
+    // For targets that still use `backtrace()` as the sole unwinder we must
+    // run the signal-unsafe part in the signal handler like we did before.
+    process_ucontext_deferred(uctx, sig_slot);
+    return;
+#else
+    if (!sentry__atomic_fetch(&g_handler_thread_ready)
+        || has_handler_thread_crashed()) {
+        // directly execute unsafe part in signal handler as a last chance to
+        // report an error when the handler thread is unavailable.
+        process_ucontext_deferred(uctx, sig_slot);
+        return;
+    }
+
+    g_handler_state.uctx = *uctx;
+    g_handler_state.sig_slot = sig_slot;
+
+#    ifdef SENTRY_PLATFORM_UNIX
+    if (uctx->siginfo) {
+        memcpy(&g_handler_state.siginfo_storage, uctx->siginfo,
+            sizeof(g_handler_state.siginfo_storage));
+        g_handler_state.uctx.siginfo = &g_handler_state.siginfo_storage;
+    } else {
+        g_handler_state.uctx.siginfo = NULL;
+    }
+
+    if (uctx->user_context) {
+        memcpy(&g_handler_state.user_context_storage, uctx->user_context,
+            sizeof(g_handler_state.user_context_storage));
+        g_handler_state.uctx.user_context
+            = &g_handler_state.user_context_storage;
+    } else {
+        g_handler_state.uctx.user_context = NULL;
+    }
+
+    // we leave the handler
+    sentry__leave_signal_handler();
+#    endif
+
+    sentry__atomic_store(&g_handler_has_work, 1);
+
+    // signal the handler thread to start working
+#    ifdef SENTRY_PLATFORM_UNIX
+    if (g_handler_pipe[1] >= 0) {
+        char c = 1;
+        ssize_t rv;
+        do {
+            rv = write(g_handler_pipe[1], &c, 1);
+        } while (rv == -1 && errno == EINTR);
+    }
+#    elif defined(SENTRY_PLATFORM_WINDOWS)
+    if (g_handler_semaphore) {
+        ReleaseSemaphore(g_handler_semaphore, 1, NULL);
+    }
+#    endif
+
+    // wait until the handler has done its work
+#    ifdef SENTRY_PLATFORM_UNIX
+    if (g_handler_ack_pipe[0] >= 0) {
+        char c = 0;
+        ssize_t rv;
+        do {
+            rv = read(g_handler_ack_pipe[0], &c, 1);
+        } while (rv == -1 && errno == EINTR);
+    }
+#    elif defined(SENTRY_PLATFORM_WINDOWS)
+    if (g_handler_ack_semaphore) {
+        WaitForSingleObject(g_handler_ack_semaphore, INFINITE);
+    }
+#    endif
+
+#    ifdef SENTRY_PLATFORM_UNIX
+    sentry__enter_signal_handler();
+#    endif
+
+#endif
+}
+
+/**
+ * This is the signal-safe part of the inproc handler. Everything in here should
+ * not defer to more than the set of functions listed in:
+ * https://www.man7.org/linux/man-pages/man7/signal-safety.7.html
+ * Since this function runs as an UnhandledExceptionFilter on Windows, the rules
+ * are less strict, but similar in nature.
+ *
+ * That means:
+ *  - no heap allocations except for sentry_malloc() (page allocator enabled!!!)
+ *  - no stdio or any kind of libc string formatting
+ *  - no logging (at least not with the printf-based default logger)
+ *  - no thread synchronization (SENTRY_WITH_OPTIONS will terminate with a log)
+ *  - in particular, don't access sentry interfaces that could request
+ *    access to options or the scope, those should go to the handler thread
+ *  - sentry_value_* and sentry_malloc are generally fine, because we use a safe
+ *    allocator, but keep in mind that some constructors create timestampy and
+ *    similar stringy and thus formatted values (and those are forbidden here).
+ *
+ *  If you are unsure about a particular function on a given target platform
+ *  please consult the signal-safety man page.
+ *
+ *  Another decision marker of whether code should go in here: do you must run
+ *  on the preempted crashed thread? Do you need to run before anything else?
+ */
+static void
+process_ucontext(const sentry_ucontext_t *uctx)
+{
+#ifdef SENTRY_PLATFORM_LINUX
+    if (g_backend_config.handler_strategy
+        == SENTRY_HANDLER_STRATEGY_CHAIN_AT_START) {
+        // On Linux (and thus Android) CLR/Mono converts signals provoked by
+        // AOT/JIT-generated native code into managed code exceptions. In these
+        // cases, we shouldn't react to the signal at all and let their handler
+        // discontinue the signal chain by invoking the runtime handler before
+        // we process the signal.
+        uintptr_t ip = get_instruction_pointer(uctx);
+        uintptr_t sp = get_stack_pointer(uctx);
+
+        // invoke the previous handler (typically the CLR/Mono
+        // signal-to-managed-exception handler)
+        invoke_signal_handler(
+            uctx->signum, uctx->siginfo, (void *)uctx->user_context);
+
+        // If the execution returns here in AOT mode, and the instruction
+        // or stack pointer were changed, it means CLR/Mono converted the
+        // signal into a managed exception and transferred execution to a
+        // managed exception handler.
+        // https://github.com/dotnet/runtime/blob/6d96e28597e7da0d790d495ba834cc4908e442cd/src/mono/mono/mini/exceptions-arm64.c#L538
+        if (ip != get_instruction_pointer(uctx)
+            || sp != get_stack_pointer(uctx)) {
+            return;
+        }
+
+        // return from runtime handler; continue processing the crash on the
+        // signal thread until the worker takes over
+    }
+#endif
+
+#ifdef SENTRY_PLATFORM_UNIX
+    sentry__enter_signal_handler();
+#endif
+
+    if (!g_backend_config.enable_logging_when_crashed) {
+        sentry__logger_disable();
+    }
+
+    const struct signal_slot *sig_slot = NULL;
+    for (int i = 0; i < SIGNAL_COUNT; ++i) {
+#ifdef SENTRY_PLATFORM_UNIX
+        if (SIGNAL_DEFINITIONS[i].signum == uctx->signum) {
+#elif defined SENTRY_PLATFORM_WINDOWS
+        if (SIGNAL_DEFINITIONS[i].signum
+            == uctx->exception_ptrs.ExceptionRecord->ExceptionCode) {
+#else
+#    error Unsupported platform
+#endif
+            sig_slot = &SIGNAL_DEFINITIONS[i];
+        }
+    }
+
+#ifdef SENTRY_PLATFORM_UNIX
+    // use a signal-safe allocator before we tear down.
+    sentry__page_allocator_enable();
+#endif
+
+    // invoke the handler thread for signal unsafe actions
+    dispatch_ucontext(uctx, sig_slot);
 
 #ifdef SENTRY_PLATFORM_UNIX
     // reset signal handlers and invoke the original ones.  This will then tear
@@ -710,7 +1198,8 @@ handle_ucontext(const sentry_ucontext_t *uctx)
     // forward as we're not restoring the page allocator.
     reset_signal_handlers();
     sentry__leave_signal_handler();
-    if (strategy != SENTRY_HANDLER_STRATEGY_CHAIN_AT_START) {
+    if (g_backend_config.handler_strategy
+        != SENTRY_HANDLER_STRATEGY_CHAIN_AT_START) {
         invoke_signal_handler(
             uctx->signum, uctx->siginfo, (void *)uctx->user_context);
     }
@@ -725,7 +1214,7 @@ handle_signal(int signum, siginfo_t *info, void *user_context)
     uctx.signum = signum;
     uctx.siginfo = info;
     uctx.user_context = (ucontext_t *)user_context;
-    handle_ucontext(&uctx);
+    process_ucontext(&uctx);
 }
 #elif defined SENTRY_PLATFORM_WINDOWS
 static LONG WINAPI
@@ -737,10 +1226,9 @@ handle_exception(EXCEPTION_POINTERS *ExceptionInfo)
         return EXCEPTION_CONTINUE_SEARCH;
     }
 
-    sentry_ucontext_t uctx;
-    memset(&uctx, 0, sizeof(uctx));
+    sentry_ucontext_t uctx = { 0 };
     uctx.exception_ptrs = *ExceptionInfo;
-    handle_ucontext(&uctx);
+    process_ucontext(&uctx);
     return EXCEPTION_CONTINUE_SEARCH;
 }
 #endif
@@ -748,7 +1236,7 @@ handle_exception(EXCEPTION_POINTERS *ExceptionInfo)
 static void
 handle_except(sentry_backend_t *UNUSED(backend), const sentry_ucontext_t *uctx)
 {
-    handle_ucontext(uctx);
+    process_ucontext(uctx);
 }
 
 sentry_backend_t *
diff --git a/src/sentry_sync.c b/src/sentry_sync.c
index 7766a6970..299e444a7 100644
--- a/src/sentry_sync.c
+++ b/src/sentry_sync.c
@@ -505,34 +505,64 @@ sentry__bgworker_get_thread_name(sentry_bgworker_t *bgw)
 
 #if defined(SENTRY_PLATFORM_UNIX) || defined(SENTRY_PLATFORM_NX)
 #    include "sentry_cpu_relax.h"
+#    include <unistd.h>
 
-static sig_atomic_t g_in_signal_handler = 0;
 static sentry_threadid_t g_signal_handling_thread = { 0 };
+static sig_atomic_t g_in_signal_handler __attribute__((aligned(64))) = 0;
 
 bool
 sentry__block_for_signal_handler(void)
 {
-    while (__sync_fetch_and_add(&g_in_signal_handler, 0)) {
-        if (sentry__threadid_equal(
-                sentry__current_thread(), g_signal_handling_thread)) {
+    for (;;) {
+        // if there is no signal handler active, we don't need to block
+        // we can spin cheaply, but for the return we must acquire
+        if (!__atomic_load_n(&g_in_signal_handler, __ATOMIC_RELAXED)) {
+            if (__atomic_load_n(&g_in_signal_handler, __ATOMIC_ACQUIRE) == 0) {
+                return true;
+            }
+        }
+
+        // if we are on the signal handler thread we can also leave
+        if (sentry__threadid_equal(sentry__current_thread(),
+                __atomic_load_n(&g_signal_handling_thread, __ATOMIC_ACQUIRE))) {
             return false;
         }
+
+        // otherwise, we spin
         sentry__cpu_relax();
     }
-    return true;
 }
 
 void
 sentry__enter_signal_handler(void)
 {
-    sentry__block_for_signal_handler();
-    g_signal_handling_thread = sentry__current_thread();
-    __sync_fetch_and_or(&g_in_signal_handler, 1);
+    for (;;) {
+        // entering a signal handler while another runs, should block us
+        while (__atomic_load_n(&g_in_signal_handler, __ATOMIC_RELAXED)) {
+            sentry__cpu_relax();
+        }
+
+        // atomically try to take ownership
+        if (__atomic_exchange_n(&g_in_signal_handler, 1, __ATOMIC_ACQ_REL)
+            == 0) {
+            // once we have, publish the handling thread too
+            sentry_threadid_t me = sentry__current_thread();
+            __atomic_store_n(&g_signal_handling_thread, me, __ATOMIC_RELEASE);
+            return;
+        }
+
+        // otherwise we've been raced, spin
+    }
 }
 
 void
 sentry__leave_signal_handler(void)
 {
-    __sync_fetch_and_and(&g_in_signal_handler, 0);
+    // reset handling thread
+    __atomic_store_n(
+        &g_signal_handling_thread, (sentry_threadid_t) { 0 }, __ATOMIC_RELAXED);
+
+    // reset handler flag
+    __atomic_store_n(&g_in_signal_handler, 0, __ATOMIC_RELEASE);
 }
 #endif
diff --git a/src/sentry_sync.h b/src/sentry_sync.h
index 5516443b9..964822e6f 100644
--- a/src/sentry_sync.h
+++ b/src/sentry_sync.h
@@ -207,6 +207,18 @@ typedef CONDITION_VARIABLE sentry_cond_t;
 #    define sentry__cond_wait(CondVar, Lock)                                   \
         sentry__cond_wait_timeout(CondVar, Lock, INFINITE)
 
+static inline sentry_threadid_t
+sentry__current_thread(void)
+{
+    return GetCurrentThread();
+}
+
+static inline int
+sentry__threadid_equal(sentry_threadid_t a, sentry_threadid_t b)
+{
+    return GetThreadId(a) == GetThreadId(b);
+}
+
 #else
 #    include <errno.h>
 #    include <pthread.h>
diff --git a/src/sentry_unix_pageallocator.c b/src/sentry_unix_pageallocator.c
index 0226c9b09..7c0e4ec40 100644
--- a/src/sentry_unix_pageallocator.c
+++ b/src/sentry_unix_pageallocator.c
@@ -33,20 +33,26 @@ static sentry_spinlock_t g_lock = SENTRY__SPINLOCK_INIT;
 bool
 sentry__page_allocator_enabled(void)
 {
-    return !!g_alloc;
+    return __atomic_load_n(&g_alloc, __ATOMIC_ACQUIRE) != NULL;
 }
 
 void
 sentry__page_allocator_enable(void)
 {
+    if (sentry__page_allocator_enabled()) {
+        return;
+    }
     sentry__spinlock_lock(&g_lock);
-    if (!g_alloc) {
-        g_alloc = &g_page_allocator_backing;
-        g_alloc->page_size = getpagesize();
-        g_alloc->last_page = NULL;
-        g_alloc->current_page = NULL;
-        g_alloc->page_offset = 0;
-        g_alloc->pages_allocated = 0;
+    if (__atomic_load_n(&g_alloc, __ATOMIC_RELAXED) == NULL) {
+        struct page_allocator_s *p = &g_page_allocator_backing;
+
+        p->page_size = getpagesize();
+        p->last_page = NULL;
+        p->current_page = NULL;
+        p->page_offset = 0;
+        p->pages_allocated = 0;
+
+        __atomic_store_n(&g_alloc, p, __ATOMIC_RELEASE);
     }
     sentry__spinlock_unlock(&g_lock);
 }
diff --git a/src/sentry_unix_spinlock.h b/src/sentry_unix_spinlock.h
index 575f86d67..b0b9f2b46 100644
--- a/src/sentry_unix_spinlock.h
+++ b/src/sentry_unix_spinlock.h
@@ -14,9 +14,15 @@ typedef volatile sig_atomic_t sentry_spinlock_t;
 
 #define SENTRY__SPINLOCK_INIT 0
 #define sentry__spinlock_lock(spinlock_ref)                                    \
-    while (!__sync_bool_compare_and_swap(spinlock_ref, 0, 1)) {                \
-        sentry__cpu_relax();                                                   \
+    for (;;) {                                                                 \
+        while (__atomic_load_n(spinlock_ref, __ATOMIC_RELAXED)) {              \
+            sentry__cpu_relax();                                               \
+        }                                                                      \
+        if (__atomic_exchange_n(spinlock_ref, 1, __ATOMIC_ACQUIRE) == 0) {     \
+            break;                                                             \
+        }                                                                      \
     }
-#define sentry__spinlock_unlock(spinlock_ref) (*spinlock_ref = 0)
+#define sentry__spinlock_unlock(spinlock_ref)                                  \
+    (__atomic_store_n(spinlock_ref, 0, __ATOMIC_RELEASE))
 
 #endif
diff --git a/src/sentry_value.c b/src/sentry_value.c
index 3d6e3a08d..f42ea1860 100644
--- a/src/sentry_value.c
+++ b/src/sentry_value.c
@@ -1270,8 +1270,7 @@ sentry_value_t
 sentry__value_new_addr(uint64_t addr)
 {
     char buf[32];
-    size_t written = (size_t)snprintf(
-        buf, sizeof(buf), "0x%llx", (unsigned long long)addr);
+    size_t written = (size_t)snprintf(buf, sizeof(buf), "0x%" PRIx64, addr);
     if (written >= sizeof(buf)) {
         return sentry_value_new_null();
     }
diff --git a/src/unwinder/sentry_unwinder.c b/src/unwinder/sentry_unwinder.c
index 19affe8fe..09664d015 100644
--- a/src/unwinder/sentry_unwinder.c
+++ b/src/unwinder/sentry_unwinder.c
@@ -16,6 +16,7 @@ DEFINE_UNWINDER(libunwindstack);
 DEFINE_UNWINDER(libbacktrace);
 DEFINE_UNWINDER(dbghelp);
 DEFINE_UNWINDER(libunwind);
+DEFINE_UNWINDER(libunwind_mac);
 DEFINE_UNWINDER(psunwind);
 
 static size_t
@@ -34,6 +35,9 @@ unwind_stack(
 #ifdef SENTRY_WITH_UNWINDER_LIBUNWIND
     TRY_UNWINDER(libunwind);
 #endif
+#ifdef SENTRY_WITH_UNWINDER_LIBUNWIND_MAC
+    TRY_UNWINDER(libunwind_mac);
+#endif
 #ifdef SENTRY_WITH_UNWINDER_PS
     TRY_UNWINDER(psunwind);
 #endif
diff --git a/src/unwinder/sentry_unwinder.h b/src/unwinder/sentry_unwinder.h
new file mode 100644
index 000000000..889d20ca1
--- /dev/null
+++ b/src/unwinder/sentry_unwinder.h
@@ -0,0 +1,8 @@
+#ifndef SENTRY_UNWINDER_H_INCLUDED
+#define SENTRY_UNWINDER_H_INCLUDED
+
+typedef struct {
+    uintptr_t lo, hi;
+} mem_range_t;
+
+#endif // SENTRY_UNWINDER_H_INCLUDED
diff --git a/src/unwinder/sentry_unwinder_libbacktrace.c b/src/unwinder/sentry_unwinder_libbacktrace.c
index dd671a958..fbfb0f895 100644
--- a/src/unwinder/sentry_unwinder_libbacktrace.c
+++ b/src/unwinder/sentry_unwinder_libbacktrace.c
@@ -2,7 +2,7 @@
 
 // XXX: Make into a CMake check
 // XXX: IBM i PASE offers libbacktrace in libutil, but not available in AIX
-#if defined(SENTRY_PLATFORM_DARWIN) || defined(__GLIBC__) || defined(__PASE__)
+#if defined(__GLIBC__) || defined(__PASE__)
 #    define HAS_EXECINFO_H
 #endif
 
@@ -14,15 +14,7 @@ size_t
 sentry__unwind_stack_libbacktrace(
     void *addr, const sentry_ucontext_t *uctx, void **ptrs, size_t max_frames)
 {
-    if (addr) {
-#if defined(SENTRY_PLATFORM_MACOS) && defined(MAC_OS_X_VERSION_10_14)          \
-    && __has_builtin(__builtin_available)
-        if (__builtin_available(macOS 10.14, *)) {
-            return (size_t)backtrace_from_fp(addr, ptrs, (int)max_frames);
-        }
-#endif
-        return 0;
-    } else if (uctx) {
+    if (addr || uctx) {
         return 0;
     }
 #ifdef HAS_EXECINFO_H
diff --git a/src/unwinder/sentry_unwinder_libunwind.c b/src/unwinder/sentry_unwinder_libunwind.c
index 844558dd7..27439c20c 100644
--- a/src/unwinder/sentry_unwinder_libunwind.c
+++ b/src/unwinder/sentry_unwinder_libunwind.c
@@ -1,7 +1,40 @@
 #include "sentry_boot.h"
 #include "sentry_logger.h"
+#include "sentry_unwinder.h"
 #define UNW_LOCAL_ONLY
 #include <libunwind.h>
+#include <stdio.h>
+
+/**
+ * Looks up the memory range for a given pointer in /proc/self/maps.
+ * `range` is an output parameter. Returns `true` if a range was found.
+ * Note: it is safe to use this function as long as we are running in a healthy
+ * thread or in the handler thread. The function is unsafe for a signal handler.
+ */
+static bool
+find_mem_range(uintptr_t ptr, mem_range_t *range)
+{
+    bool found = false;
+    FILE *fp = fopen("/proc/self/maps", "r");
+    if (!fp) {
+        return found;
+    }
+    char line[512];
+    while (fgets(line, sizeof line, fp)) {
+        unsigned long lo, hi;
+        if (sscanf(line, "%lx-%lx", &lo, &hi) == 2) {
+            // our bounds are [lo, hi)
+            if (ptr >= lo && ptr < hi) {
+                range->lo = (uintptr_t)lo;
+                range->hi = (uintptr_t)hi;
+                found = true;
+                break;
+            }
+        }
+    }
+    fclose(fp);
+    return found;
+}
 
 size_t
 sentry__unwind_stack_libunwind(
@@ -21,7 +54,16 @@ sentry__unwind_stack_libunwind(
         }
     } else {
         unw_context_t uc;
+#ifdef __clang__
+// This pragma is required to build with Werror on ARM64 Ubuntu
+#    pragma clang diagnostic push
+#    pragma clang diagnostic ignored                                           \
+        "-Wgnu-statement-expression-from-macro-expansion"
+#endif
         int ret = unw_getcontext(&uc);
+#ifdef __clang__
+#    pragma clang diagnostic pop
+#endif
         if (ret != 0) {
             SENTRY_WARN("Failed to retrieve context with libunwind");
             return 0;
@@ -34,14 +76,46 @@ sentry__unwind_stack_libunwind(
         }
     }
 
-    size_t frame_idx = 0;
-    while (unw_step(&cursor) > 0 && frame_idx < max_frames - 1) {
-        unw_word_t ip = 0;
-        unw_get_reg(&cursor, UNW_REG_IP, &ip);
-        ptrs[frame_idx] = (void *)ip;
-        unw_word_t sp = 0;
-        unw_get_reg(&cursor, UNW_REG_SP, &sp);
-        frame_idx++;
+    size_t n = 0;
+    // get the first frame and stack pointer
+    unw_word_t ip = 0;
+    if (unw_get_reg(&cursor, UNW_REG_IP, &ip) < 0) {
+        return n;
+    }
+    if (n < max_frames) {
+        ptrs[n++] = (void *)ip;
+    }
+    unw_word_t sp = 0;
+    (void)unw_get_reg(&cursor, UNW_REG_SP, &sp);
+
+    // ensure we have a valid stack pointer otherwise we only send the top frame
+    mem_range_t stack = { 0, 0 };
+    if (uctx && !find_mem_range((uintptr_t)sp, &stack)) {
+        SENTRY_WARNF("unwinder: SP (%p) is in unmapped memory likely due to "
+                     "stack overflow",
+            (void *)sp);
+        return n;
+    }
+
+    // walk the callers
+    unw_word_t prev_ip = ip;
+    unw_word_t prev_sp = sp;
+    while (n < max_frames && unw_step(&cursor) > 0) {
+        // stop the walk if we fail to read IP
+        if (unw_get_reg(&cursor, UNW_REG_IP, &ip) < 0) {
+            break;
+        }
+        // SP is optional for progress
+        (void)unw_get_reg(&cursor, UNW_REG_SP, &sp);
+
+        // stop the walk if there is _no_ progress
+        if (ip == prev_ip && sp == prev_sp) {
+            break;
+        }
+
+        prev_ip = ip;
+        prev_sp = sp;
+        ptrs[n++] = (void *)ip;
     }
-    return frame_idx + 1;
+    return n;
 }
diff --git a/src/unwinder/sentry_unwinder_libunwind_mac.c b/src/unwinder/sentry_unwinder_libunwind_mac.c
new file mode 100644
index 000000000..b9a909828
--- /dev/null
+++ b/src/unwinder/sentry_unwinder_libunwind_mac.c
@@ -0,0 +1,207 @@
+#include "sentry_boot.h"
+#include "sentry_logger.h"
+#include "sentry_unwinder.h"
+#include <libunwind.h>
+
+#if defined(SENTRY_PLATFORM_MACOS)
+#    include <mach/mach.h>
+#    include <mach/mach_vm.h>
+#endif
+
+#if defined(SENTRY_PLATFORM_MACOS)
+// Basic pointer validation to make sure we stay inside mapped memory.
+static bool
+is_readable_ptr(uintptr_t p, size_t size)
+{
+    if (!p) {
+        return false;
+    }
+
+    mach_vm_address_t address = (mach_vm_address_t)p;
+    mach_vm_size_t region_size = 0;
+    vm_region_basic_info_data_64_t info;
+    mach_msg_type_number_t count = VM_REGION_BASIC_INFO_COUNT_64;
+    mach_port_t object = MACH_PORT_NULL;
+
+    kern_return_t kr = mach_vm_region(mach_task_self(), &address, &region_size,
+        VM_REGION_BASIC_INFO_64, (vm_region_info_t)&info, &count, &object);
+    if (object != MACH_PORT_NULL) {
+        mach_port_deallocate(mach_task_self(), object);
+    }
+    if (kr != KERN_SUCCESS) {
+        return false;
+    }
+
+    if (!(info.protection & VM_PROT_READ)) {
+        return false;
+    }
+
+    mem_range_t vm_region
+        = { (uintptr_t)address, (uintptr_t)address + (uintptr_t)region_size };
+    if (vm_region.hi < vm_region.lo) {
+        return false;
+    }
+
+    uintptr_t end = p + (uintptr_t)size;
+    if (end < p) {
+        return false;
+    }
+
+    return p >= vm_region.lo && end <= vm_region.hi;
+}
+#endif
+
+static bool
+valid_ptr(uintptr_t p)
+{
+    if (!p || (p % sizeof(uintptr_t)) != 0) {
+        return false;
+    }
+
+#if defined(SENTRY_PLATFORM_MACOS)
+    return is_readable_ptr(p, sizeof(uintptr_t) * 2);
+#else
+    return true;
+#endif
+}
+
+/**
+ * This does the same frame-pointer walk for arm64 and x86_64, with the only
+ * difference being which registers value is used as frame-pointer (fp vs rbp)
+ */
+static void
+fp_walk(uintptr_t fp, size_t *n, void **ptrs, size_t max_frames)
+{
+    while (*n < max_frames) {
+        if (!valid_ptr(fp)) {
+            break;
+        }
+
+        // arm64 frame record layout: [prev_fp, saved_lr] at fp and fp+8
+        // x86_64 frame record layout: [prev_rbp, saved_retaddr] at bp and bp+8
+        const uintptr_t *record = (uintptr_t *)fp;
+        const uintptr_t next_fp = record[0];
+        const uintptr_t ret_addr = record[1];
+        if (!valid_ptr(next_fp) || !ret_addr) {
+            break;
+        }
+
+        ptrs[(*n)++] = (void *)(ret_addr - 1);
+        if (next_fp <= fp) {
+            break; // prevent loops
+        }
+        fp = next_fp;
+    }
+}
+
+static size_t
+fp_walk_from_uctx(const sentry_ucontext_t *uctx, void **ptrs, size_t max_frames)
+{
+    size_t n = 0;
+    struct __darwin_mcontext64 *mctx = uctx->user_context->uc_mcontext;
+#if defined(__arm64__)
+    uintptr_t pc = (uintptr_t)mctx->__ss.__pc;
+    uintptr_t fp = (uintptr_t)mctx->__ss.__fp;
+    uintptr_t lr = (uintptr_t)mctx->__ss.__lr;
+
+    // top frame: adjust pc−1 so it symbolizes inside the function
+    if (pc && n < max_frames) {
+        ptrs[n++] = (void *)(pc - 1);
+    }
+
+    // next frame is from saved LR at current FP record
+    if (lr && n < max_frames) {
+        ptrs[n++] = (void *)(lr - 1);
+    }
+
+    fp_walk(fp, &n, ptrs, max_frames);
+#elif defined(__x86_64__)
+    uintptr_t ip = (uintptr_t)mctx->__ss.__rip;
+    uintptr_t bp = (uintptr_t)mctx->__ss.__rbp;
+
+    // top frame: adjust ip−1 so it symbolizes inside the function
+    if (ip && n < max_frames) {
+        ptrs[n++] = (void *)(ip - 1);
+    }
+
+    fp_walk(bp, &n, ptrs, max_frames);
+#else
+#    error "Unsupported CPU architecture for macOS stackwalker"
+#endif
+    return n;
+}
+
+size_t
+sentry__unwind_stack_libunwind_mac(
+    void *addr, const sentry_ucontext_t *uctx, void **ptrs, size_t max_frames)
+{
+    if (addr) {
+        size_t n = 0;
+        fp_walk((uintptr_t)addr, &n, ptrs, max_frames);
+        return n;
+    }
+
+    if (uctx) {
+        return fp_walk_from_uctx(uctx, ptrs, max_frames);
+    }
+
+    // fall back on the system `libunwind` for stack-traces "from call-site"
+    unw_context_t uc;
+    int ret = unw_getcontext(&uc);
+    if (ret != 0) {
+        SENTRY_WARN("Failed to retrieve context with libunwind");
+        return 0;
+    }
+
+    unw_cursor_t cursor;
+    ret = unw_init_local(&cursor, &uc);
+    if (ret != 0) {
+        SENTRY_WARN("Failed to initialize libunwind with local context");
+        return 0;
+    }
+    size_t n = 0;
+    // get the first frame
+    if (n < max_frames) {
+        unw_word_t ip = 0;
+        if (unw_get_reg(&cursor, UNW_REG_IP, &ip) >= 0) {
+#if defined(__arm64__)
+            // Strip pointer authentication, for some reason ptrauth_strip() not
+            // working
+            // https://developer.apple.com/documentation/security/preparing_your_app_to_work_with_pointer_authentication
+            ip &= 0x7fffffffffffull;
+#endif
+            ptrs[n++] = (void *)ip;
+        } else {
+            return 0;
+        }
+    }
+    // walk the callers
+    unw_word_t prev_ip = (uintptr_t)ptrs[0];
+    unw_word_t prev_sp = 0;
+    (void)unw_get_reg(&cursor, UNW_REG_SP, &prev_sp);
+    while (n < max_frames && unw_step(&cursor) > 0) {
+        unw_word_t ip = 0, sp = 0;
+        // stop the walk if we fail to read IP
+        if (unw_get_reg(&cursor, UNW_REG_IP, &ip) < 0) {
+            break;
+        }
+        // SP is optional for progress
+        (void)unw_get_reg(&cursor, UNW_REG_SP, &sp);
+
+        // stop the walk if there is _no_ progress
+        if (ip == prev_ip && sp == prev_sp) {
+            break;
+        }
+#if defined(__arm64__)
+        // Strip pointer authentication, for some reason ptrauth_strip() not
+        // working
+        // https://developer.apple.com/documentation/security/preparing_your_app_to_work_with_pointer_authentication
+        ip &= 0x7fffffffffffull;
+#endif
+        prev_ip = ip;
+        prev_sp = sp;
+        ptrs[n++] = (void *)ip;
+    }
+
+    return n;
+}
diff --git a/tests/assertions.py b/tests/assertions.py
index 05d3ea320..d40311c18 100644
--- a/tests/assertions.py
+++ b/tests/assertions.py
@@ -127,11 +127,11 @@ def assert_event_meta(
             match = VERSION_RE.match(version)
             version = match.group(1)
             build = match.group(2)
+            expected_os_context = {"name": "Linux", "version": version}
+            if build:
+                expected_os_context["build"] = build
 
-            assert_matches(
-                event["contexts"]["os"],
-                {"name": "Linux", "version": version, "build": build},
-            )
+            assert_matches(event["contexts"]["os"], expected_os_context)
             assert "distribution_name" in event["contexts"]["os"]
             assert "distribution_version" in event["contexts"]["os"]
         elif sys.platform == "darwin":
@@ -170,6 +170,16 @@ def assert_event_meta(
         )
 
 
+def is_valid_hex(s):
+    if not s.lower().startswith("0x"):
+        return False
+    try:
+        int(s, 0)
+        return True
+    except ValueError:
+        return False
+
+
 def assert_stacktrace(
     envelope, inside_exception=False, check_size=True, check_package=False
 ):
@@ -181,7 +191,7 @@ def assert_stacktrace(
 
     if check_size:
         assert len(frames) > 0
-        assert all(frame["instruction_addr"].startswith("0x") for frame in frames)
+        assert all(is_valid_hex(frame["instruction_addr"]) for frame in frames)
         assert any(
             frame.get("function") is not None and frame.get("package") is not None
             for frame in frames
diff --git a/tests/cmake.py b/tests/cmake.py
index d49d0243f..62484910a 100644
--- a/tests/cmake.py
+++ b/tests/cmake.py
@@ -154,9 +154,7 @@ def cmake(cwd, targets, options=None, cflags=None):
     config_cmd = cmake.copy()
 
     if os.environ.get("VS_GENERATOR_TOOLSET") == "ClangCL":
-        config_cmd.append("-G Visual Studio 17 2022")
-        config_cmd.append("-A x64")
-        config_cmd.append("-T ClangCL")
+        configure_clang_cl(config_cmd)
 
     for key, value in options.items():
         config_cmd.append("-D{}={}".format(key, value))
@@ -167,17 +165,7 @@ def cmake(cwd, targets, options=None, cflags=None):
     if "asan" in os.environ.get("RUN_ANALYZER", ""):
         config_cmd.append("-DWITH_ASAN_OPTION=ON")
     if "tsan" in os.environ.get("RUN_ANALYZER", ""):
-        module_dir = Path(__file__).resolve().parent
-        tsan_options = {
-            "verbosity": 2,
-            "detect_deadlocks": 1,
-            "second_deadlock_stack": 1,
-            "suppressions": module_dir / "tsan.supp",
-        }
-        os.environ["TSAN_OPTIONS"] = ":".join(
-            f"{key}={value}" for key, value in tsan_options.items()
-        )
-        config_cmd.append("-DWITH_TSAN_OPTION=ON")
+        configure_tsan(config_cmd)
 
     # we have to set `-Werror` for this cmake invocation only, otherwise
     # completely unrelated things will break
@@ -190,39 +178,7 @@ def cmake(cwd, targets, options=None, cflags=None):
     if "gcc" in os.environ.get("RUN_ANALYZER", ""):
         cflags.append("-fanalyzer")
     if "llvm-cov" in os.environ.get("RUN_ANALYZER", ""):
-        if False and os.environ.get("VS_GENERATOR_TOOLSET") == "ClangCL":
-            # for clang-cl in CI we have to use `--coverage` rather than `fprofile-instr-generate` and provide an
-            # architecture-specific profiling library for it work:
-            # TODO: This currently doesn't work due to https://gitlab.kitware.com/cmake/cmake/-/issues/24025
-            #       The issue describes a behavior where generated object-name is specified via `/fo` using a target
-            #       directory, rather than a file-name (this is CMake behavior). While the `clang-cl` suggest that this
-            #       is supported the flag produces `.gcda` and `.gcno` files, which have no relation with the object-
-            #       file and which leads to failure to accumulate coverage data.
-            #       This would have to be fixed in clang-cl: https://github.com/llvm/llvm-project/issues/87304
-            #       Let's leave this in here for posterity, it would be great to get coverage analysis on Windows.
-            flags = "--coverage"
-            profile_lib = "clang_rt.profile-x86_64.lib"
-            config_cmd.append(f"-DCMAKE_EXE_LINKER_FLAGS='{profile_lib}'")
-            config_cmd.append(f"-DCMAKE_SHARED_LINKER_FLAGS='{profile_lib}'")
-            config_cmd.append(f"-DCMAKE_MODULE_LINKER_FLAGS='{profile_lib}'")
-        else:
-            flags = "-fprofile-instr-generate -fcoverage-mapping"
-        config_cmd.append("-DCMAKE_C_FLAGS='{}'".format(flags))
-
-        # Since we overwrite `CXXFLAGS` below, we must add the experimental library here for the GHA runner that builds
-        # sentry-native with LLVM clang for macOS (to run ASAN on macOS) rather than the version coming with XCode.
-        # TODO: remove this if the GHA runner image for macOS ever updates beyond llvm15.
-        if (
-            sys.platform == "darwin"
-            and os.environ.get("CC", "") == "clang"
-            and shutil.which("clang") == "/usr/local/opt/llvm@15/bin/clang"
-        ):
-            flags = (
-                flags
-                + " -L/usr/local/opt/llvm@15/lib/c++ -fexperimental-library -Wno-unused-command-line-argument"
-            )
-
-        config_cmd.append("-DCMAKE_CXX_FLAGS='{}'".format(flags))
+        configure_llvm_cov(config_cmd)
     if "CMAKE_DEFINES" in os.environ:
         config_cmd.extend(os.environ.get("CMAKE_DEFINES").split())
     env = dict(os.environ)
@@ -232,9 +188,18 @@ def cmake(cwd, targets, options=None, cflags=None):
 
     print("\n{} > {}".format(cwd, " ".join(config_cmd)), flush=True)
     try:
-        subprocess.run(config_cmd, cwd=cwd, env=env, check=True)
-    except subprocess.CalledProcessError:
-        raise pytest.fail.Exception("cmake configure failed") from None
+        result = subprocess.run(
+            config_cmd, cwd=cwd, env=env, check=True, capture_output=True, text=True
+        )
+        sys.stdout.write(result.stdout)
+        sys.stderr.write(result.stderr or "")
+    except subprocess.CalledProcessError as e:
+        if "Could NOT find ZLIB" in e.stderr:
+            pytest.skip("ZLIB not found")
+        else:
+            sys.stdout.write(e.stdout)
+            sys.stderr.write(e.stderr or "")
+            raise pytest.fail.Exception("cmake configure failed") from None
 
     # CodeChecker invocations and options are documented here:
     # https://github.com/Ericsson/codechecker/blob/master/docs/analyzer/user_guide.md
@@ -317,3 +282,59 @@ def cmake(cwd, targets, options=None, cflags=None):
             cwd=cwd,
             check=True,
         )
+
+
+def configure_clang_cl(config_cmd: list[str]):
+    config_cmd.append("-G Visual Studio 17 2022")
+    config_cmd.append("-A x64")
+    config_cmd.append("-T ClangCL")
+
+
+def configure_tsan(config_cmd: list[str]):
+    module_dir = Path(__file__).resolve().parent
+    tsan_options = {
+        "verbosity": 2,
+        "detect_deadlocks": 1,
+        "second_deadlock_stack": 1,
+        "suppressions": module_dir / "tsan.supp",
+    }
+    os.environ["TSAN_OPTIONS"] = ":".join(
+        f"{key}={value}" for key, value in tsan_options.items()
+    )
+    config_cmd.append("-DWITH_TSAN_OPTION=ON")
+
+
+def configure_llvm_cov(config_cmd: list[str]):
+    if False and os.environ.get("VS_GENERATOR_TOOLSET") == "ClangCL":
+        # for clang-cl in CI we have to use `--coverage` rather than `fprofile-instr-generate` and provide an
+        # architecture-specific profiling library for it work:
+        # TODO: This currently doesn't work due to https://gitlab.kitware.com/cmake/cmake/-/issues/24025
+        #       The issue describes a behavior where generated object-name is specified via `/fo` using a target
+        #       directory, rather than a file-name (this is CMake behavior). While the `clang-cl` suggest that this
+        #       is supported the flag produces `.gcda` and `.gcno` files, which have no relation with the object-
+        #       file and which leads to failure to accumulate coverage data.
+        #       This would have to be fixed in clang-cl: https://github.com/llvm/llvm-project/issues/87304
+        #       Let's leave this in here for posterity, it would be great to get coverage analysis on Windows.
+        flags = "--coverage"
+        profile_lib = "clang_rt.profile-x86_64.lib"
+        config_cmd.append(f"-DCMAKE_EXE_LINKER_FLAGS='{profile_lib}'")
+        config_cmd.append(f"-DCMAKE_SHARED_LINKER_FLAGS='{profile_lib}'")
+        config_cmd.append(f"-DCMAKE_MODULE_LINKER_FLAGS='{profile_lib}'")
+    else:
+        flags = "-fprofile-instr-generate -fcoverage-mapping"
+    config_cmd.append("-DCMAKE_C_FLAGS='{}'".format(flags))
+
+    # Since we overwrite `CXXFLAGS` below, we must add the experimental library here for the GHA runner that builds
+    # sentry-native with LLVM clang for macOS (to run ASAN on macOS) rather than the version coming with XCode.
+    # TODO: remove this if the GHA runner image for macOS ever updates beyond llvm15.
+    if (
+        sys.platform == "darwin"
+        and os.environ.get("CC", "") == "clang"
+        and shutil.which("clang") == "/usr/local/opt/llvm@15/bin/clang"
+    ):
+        flags = (
+            flags
+            + " -L/usr/local/opt/llvm@15/lib/c++ -fexperimental-library -Wno-unused-command-line-argument"
+        )
+
+    config_cmd.append("-DCMAKE_CXX_FLAGS='{}'".format(flags))
diff --git a/tests/test_dotnet_signals.py b/tests/test_dotnet_signals.py
index 9d7fe07c9..7c4e2a70d 100644
--- a/tests/test_dotnet_signals.py
+++ b/tests/test_dotnet_signals.py
@@ -65,6 +65,9 @@ def run_dotnet_native_crash(tmp_path):
     reason="dotnet signal handling is currently only supported on 64-bit Linux without sanitizers",
 )
 def test_dotnet_signals_inproc(cmake):
+    if shutil.which("dotnet") is None:
+        pytest.skip("dotnet is not installed")
+
     try:
         # build native client library with inproc and the example for crash dumping
         tmp_path = cmake(
@@ -166,6 +169,9 @@ def run_aot_native_crash(tmp_path):
     reason="dotnet AOT signal handling is currently only supported on 64-bit Linux without sanitizers",
 )
 def test_aot_signals_inproc(cmake):
+    if shutil.which("dotnet") is None:
+        pytest.skip("dotnet is not installed")
+
     try:
         # build native client library with inproc and the example for crash dumping
         tmp_path = cmake(
diff --git a/tests/test_integration_stdout.py b/tests/test_integration_stdout.py
index fdded435a..d7ed20ccb 100644
--- a/tests/test_integration_stdout.py
+++ b/tests/test_integration_stdout.py
@@ -240,7 +240,7 @@ def test_inproc_crash_stdout_before_send_and_on_crash(cmake):
 )
 def test_inproc_stack_overflow_stdout(cmake, build_args):
     tmp_path, output = run_stdout_for(
-        "inproc", cmake, ["attachment", "stack-overflow"], build_args
+        "inproc", cmake, ["log", "attachment", "stack-overflow"], build_args
     )
 
     envelope = Envelope.deserialize(output)
diff --git a/tests/test_unit.py b/tests/test_unit.py
index 7ce28d04f..c38fffb91 100644
--- a/tests/test_unit.py
+++ b/tests/test_unit.py
@@ -17,7 +17,11 @@ def test_unit(cmake, unittest):
 
 @pytest.mark.skipif(not has_http, reason="tests need http transport")
 def test_unit_transport(cmake, unittest):
-    if unittest in ["custom_logger", "logger_enable_disable_functionality"]:
+    if unittest in [
+        "custom_logger",
+        "logger_enable_disable_functionality",
+        "logger_level",
+    ]:
         pytest.skip("excluded from transport test-suite")
 
     cwd = cmake(
diff --git a/tests/unit/test_process.c b/tests/unit/test_process.c
index baeff7b20..8fed7546e 100644
--- a/tests/unit/test_process.c
+++ b/tests/unit/test_process.c
@@ -24,6 +24,20 @@ SENTRY_TEST(process_invalid)
     sentry__path_free(nul);
 }
 
+#ifndef SENTRY_PLATFORM_WINDOWS
+void
+find_cp_path(char *buf, size_t buf_len)
+{
+    FILE *fp = popen("command -v cp 2>/dev/null", "r");
+    if (fp && fgets(buf, buf_len, fp)) {
+        buf[strcspn(buf, "\n")] = 0; // strip newline
+    }
+    if (fp) {
+        pclose(fp);
+    }
+}
+#endif
+
 SENTRY_TEST(process_spawn)
 {
 #if defined(SENTRY_PLATFORM_WINDOWS) || defined(SENTRY_PLATFORM_MACOS)         \
@@ -46,8 +60,10 @@ SENTRY_TEST(process_spawn)
     sentry__process_spawn(cmd, "/C", "copy", exe->path, dst->path, NULL);
     sentry__path_free(cmd);
 #    else
-    // /bin/cp <src> <dst>
-    sentry_path_t *cp = sentry__path_from_str("/bin/cp");
+    char cp_path[512] = "/bin/cp";
+    find_cp_path(cp_path, sizeof(cp_path));
+    // cp <src> <dst>
+    sentry_path_t *cp = sentry__path_from_str(cp_path);
     TEST_ASSERT(!!cp);
     sentry__process_spawn(cp, exe->path, dst->path, NULL);
     sentry__path_free(cp);
diff --git a/tests/valgrind.txt b/tests/valgrind.txt
index 92b756580..aaf7a10b5 100644
--- a/tests/valgrind.txt
+++ b/tests/valgrind.txt
@@ -37,3 +37,11 @@
     fun:sentry__bgworker_start
     fun:test_sentry_task_queue
 }
+{
+    # writes of uninit buffers with padding or foreign-stack pointers lead to false-positive during stack unwinding
+   libunwind writes buffers we do not manage
+   Memcheck:Param
+   write(buf)
+   ...
+   obj:*/libunwind.so*
+}
\ No newline at end of file