Skip to content

Commit

Permalink
Merge pull request #5549 from benesch/overflow-backtraces
Browse files Browse the repository at this point in the history
materialized: attempt to produce nice backtraces on stack overflow
  • Loading branch information
benesch authored Feb 2, 2021
2 parents 6725cfc + f11f749 commit a1438d9
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 2 deletions.
6 changes: 4 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions src/materialized/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,10 @@ include_dir = "0.6.0"
itertools = "0.9.0"
krb5-src = { version = "0.2.3", features = ["binaries"] }
lazy_static = "1.4.0"
libc = "0.2.84"
log = "0.4.13"
mz-process-collector = { path = "../mz-process-collector" }
nix = "0.19.1"
num_cpus = "1.0.0"
openssl = { version = "0.10.32", features = ["vendored"] }
openssl-sys = { version = "0.9.59", features = ["vendored"] }
Expand Down
1 change: 1 addition & 0 deletions src/materialized/src/bin/materialized/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ fn main() {

fn run(args: Args) -> Result<(), anyhow::Error> {
panic::set_hook(Box::new(handle_panic));
sys::enable_sigsegv_backtraces()?;

if args.version > 0 {
println!("materialized {}", materialized::BUILD_INFO.human_version());
Expand Down
82 changes: 82 additions & 0 deletions src/materialized/src/bin/materialized/sys.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,13 @@

//! System support functions.
use std::alloc::{self, Layout};
use std::ptr;

use anyhow::{bail, Context};
use log::{trace, warn};
use nix::errno;
use nix::sys::signal;

#[cfg(not(any(target_os = "macos", target_os = "linux", target_os = "ios")))]
pub fn adjust_rlimits() {
Expand Down Expand Up @@ -90,3 +96,79 @@ pub fn adjust_rlimits() {
)
}
}

/// Attempts to enable backtraces when SIGSEGV occurs.
///
/// In particular, this means producing backtraces on stack overflow, as
/// stack overflow raises SIGSEGV. The approach here involves making system
/// calls to handle SIGSEGV on an alternate signal stack, which seems to work
/// well in practice but may technically be undefined behavior.
///
/// Rust may someday do this by default.
/// Follow: https://github.com/rust-lang/rust/issues/51405.
pub fn enable_sigsegv_backtraces() -> Result<(), anyhow::Error> {
// This code is derived from the code in the backtrace-on-stack-overflow
// crate, which is freely available under the terms of the Apache 2.0
// license. The modifications here provide better error messages if any of
// the various system calls fail.
//
// See: https://github.com/matklad/backtrace-on-stack-overflow

// NOTE(benesch): The stack size was chosen to match the default Rust thread
// stack size of 2MiB. Probably overkill, but we'd much rather have
// backtraces on stack overflow than squabble over a few megabytes. Using
// libc::SIGSTKSZ is tempting, but its default of 8KiB on my system makes me
// nervous. Rust code isn't used to running with a stack that small.
const STACK_SIZE: usize = 2 << 20;

// x86_64 and aarch64 require 16-byte alignment. Its hard to imagine other
// platforms that would have more stringent requirements.
const STACK_ALIGN: usize = 16;

// Allocate a stack.
let buf_layout =
Layout::from_size_align(STACK_SIZE, STACK_ALIGN).expect("layout known to be valid");
// SAFETY: layout has non-zero size and the uninitialized memory that is
// returned is never read (at least, not by Rust).
let buf = unsafe { alloc::alloc(buf_layout) };

// Request that signals be delivered to this alternate stack.
let stack = libc::stack_t {
ss_sp: buf as *mut libc::c_void,
ss_flags: 0,
ss_size: STACK_SIZE,
};
// SAFETY: `stack` is a valid pointer to a `stack_t` object and the second
// parameter, `old_ss`, is permitted to be `NULL` according to POSIX.
let ret = unsafe { libc::sigaltstack(&stack, ptr::null_mut()) };
if ret == -1 {
let errno = errno::from_i32(errno::errno());
bail!("failed to configure alternate signal stack: {}", errno);
}

// Install a handler for SIGSEGV.
let action = signal::SigAction::new(
signal::SigHandler::Handler(handle_sigsegv),
signal::SaFlags::SA_NODEFER | signal::SaFlags::SA_ONSTACK,
signal::SigSet::empty(),
);
// SAFETY: see `handle_sigsegv`.
unsafe { signal::sigaction(signal::SIGSEGV, &action) }
.context("failed to install SIGSEGV handler")?;

Ok(())
}

extern "C" fn handle_sigsegv(_: i32) {
// SAFETY: this is is a signal handler function and technically must be
// "async-signal safe" [0]. That typically means no memory allocation, which
// means no panics or backtraces... but if we're here, we're already doomed
// by a segfault. So there is little harm to ignoring the rules and
// panicking. If we're successful, as we often are, the panic will be caught
// by our panic handler and displayed nicely with a backtrace that traces
// *through* the signal handler and includes the frames that led to the
// SIGSEGV.
//
// [0]: https://man7.org/linux/man-pages/man7/signal-safety.7.html
panic!("stack overflow");
}

0 comments on commit a1438d9

Please sign in to comment.