Skip to content

Commit

Permalink
runtime: let the fault thread to crash the process
Browse files Browse the repository at this point in the history
Let the fault thread to crash the program to make sure while gdb coredump file could see the correct backtrace in the number one thread in gdb.

Fixes #63277.

Change-Id: Ie4473f76f0feba596091433918bcd35a4ff7e11b
GitHub-Last-Rev: f4615c2
GitHub-Pull-Request: #63666
Reviewed-on: https://go-review.googlesource.com/c/go/+/536895
Reviewed-by: Michael Pratt <mpratt@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
  • Loading branch information
zzkcode authored and cherrymui committed Dec 2, 2023
1 parent 58bfef8 commit de5b418
Show file tree
Hide file tree
Showing 2 changed files with 206 additions and 47 deletions.
216 changes: 180 additions & 36 deletions src/runtime/runtime-gdb_unix_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,43 @@ import (
"testing"
)

func canGenerateCore(t *testing.T) bool {
// Ensure there is enough RLIMIT_CORE available to generate a full core.
var lim syscall.Rlimit
err := syscall.Getrlimit(syscall.RLIMIT_CORE, &lim)
if err != nil {
t.Fatalf("error getting rlimit: %v", err)
}
// Minimum RLIMIT_CORE max to allow. This is a conservative estimate.
// Most systems allow infinity.
const minRlimitCore = 100 << 20 // 100 MB
if lim.Max < minRlimitCore {
t.Skipf("RLIMIT_CORE max too low: %#+v", lim)
}

// Make sure core pattern will send core to the current directory.
b, err := os.ReadFile("/proc/sys/kernel/core_pattern")
if err != nil {
t.Fatalf("error reading core_pattern: %v", err)
}
if string(b) != "core\n" {
t.Skipf("Unexpected core pattern %q", string(b))
}

coreUsesPID := false
b, err = os.ReadFile("/proc/sys/kernel/core_uses_pid")
if err == nil {
switch string(bytes.TrimSpace(b)) {
case "0":
case "1":
coreUsesPID = true
default:
t.Skipf("unexpected core_uses_pid value %q", string(b))
}
}
return coreUsesPID
}

const coreSignalSource = `
package main
Expand Down Expand Up @@ -81,45 +118,12 @@ func TestGdbCoreSignalBacktrace(t *testing.T) {
t.Parallel()
checkGdbVersion(t)

// Ensure there is enough RLIMIT_CORE available to generate a full core.
var lim syscall.Rlimit
err := syscall.Getrlimit(syscall.RLIMIT_CORE, &lim)
if err != nil {
t.Fatalf("error getting rlimit: %v", err)
}
// Minimum RLIMIT_CORE max to allow. This is a conservative estimate.
// Most systems allow infinity.
const minRlimitCore = 100 << 20 // 100 MB
if lim.Max < minRlimitCore {
t.Skipf("RLIMIT_CORE max too low: %#+v", lim)
}

// Make sure core pattern will send core to the current directory.
b, err := os.ReadFile("/proc/sys/kernel/core_pattern")
if err != nil {
t.Fatalf("error reading core_pattern: %v", err)
}
if string(b) != "core\n" {
t.Skipf("Unexpected core pattern %q", string(b))
}

coreUsesPID := false
b, err = os.ReadFile("/proc/sys/kernel/core_uses_pid")
if err == nil {
switch string(bytes.TrimSpace(b)) {
case "0":
case "1":
coreUsesPID = true
default:
t.Skipf("unexpected core_uses_pid value %q", string(b))
}
}

dir := t.TempDir()
coreUsesPID := canGenerateCore(t)

// Build the source code.
dir := t.TempDir()
src := filepath.Join(dir, "main.go")
err = os.WriteFile(src, []byte(coreSignalSource), 0644)
err := os.WriteFile(src, []byte(coreSignalSource), 0644)
if err != nil {
t.Fatalf("failed to create file: %v", err)
}
Expand Down Expand Up @@ -230,3 +234,143 @@ func TestGdbCoreSignalBacktrace(t *testing.T) {
t.Fatalf("could not find runtime symbol in backtrace after signal handler:\n%s", rest)
}
}

const coreCrashThreadSource = `
package main
/*
#cgo CFLAGS: -g -O0
#include <stdio.h>
#include <stddef.h>
void trigger_crash()
{
int* ptr = NULL;
*ptr = 1024;
}
*/
import "C"
import (
"flag"
"fmt"
"os"
"runtime/debug"
"syscall"
)
func enableCore() {
debug.SetTraceback("crash")
var lim syscall.Rlimit
err := syscall.Getrlimit(syscall.RLIMIT_CORE, &lim)
if err != nil {
panic(fmt.Sprintf("error getting rlimit: %v", err))
}
lim.Cur = lim.Max
fmt.Fprintf(os.Stderr, "Setting RLIMIT_CORE = %+#v\n", lim)
err = syscall.Setrlimit(syscall.RLIMIT_CORE, &lim)
if err != nil {
panic(fmt.Sprintf("error setting rlimit: %v", err))
}
}
func main() {
flag.Parse()
enableCore()
C.trigger_crash()
}
`

// TestGdbCoreCrashThreadBacktrace tests that runtime could let the fault thread to crash process
// and make fault thread as number one thread while gdb in a core file
func TestGdbCoreCrashThreadBacktrace(t *testing.T) {
if runtime.GOOS != "linux" {
// N.B. This test isn't fundamentally Linux-only, but it needs
// to know how to enable/find core files on each OS.
t.Skip("Test only supported on Linux")
}
if runtime.GOARCH != "386" && runtime.GOARCH != "amd64" {
// TODO(go.dev/issue/25218): Other architectures use sigreturn
// via VDSO, which we somehow don't handle correctly.
t.Skip("Backtrace through signal handler only works on 386 and amd64")
}

checkGdbEnvironment(t)
t.Parallel()
checkGdbVersion(t)

coreUsesPID := canGenerateCore(t)

// Build the source code.
dir := t.TempDir()
src := filepath.Join(dir, "main.go")
err := os.WriteFile(src, []byte(coreCrashThreadSource), 0644)
if err != nil {
t.Fatalf("failed to create file: %v", err)
}
cmd := exec.Command(testenv.GoToolPath(t), "build", "-o", "a.exe", "main.go")
cmd.Dir = dir
out, err := testenv.CleanCmdEnv(cmd).CombinedOutput()
if err != nil {
t.Fatalf("building source %v\n%s", err, out)
}

// Start the test binary.
cmd = testenv.Command(t, "./a.exe")
cmd.Dir = dir
var output bytes.Buffer
cmd.Stdout = &output // for test logging
cmd.Stderr = &output

if err := cmd.Start(); err != nil {
t.Fatalf("error starting test binary: %v", err)
}

pid := cmd.Process.Pid

err = cmd.Wait()
t.Logf("child output:\n%s", output.String())
if err == nil {
t.Fatalf("Wait succeeded, want SIGABRT")
}
ee, ok := err.(*exec.ExitError)
if !ok {
t.Fatalf("Wait err got %T %v, want exec.ExitError", ee, ee)
}
ws, ok := ee.Sys().(syscall.WaitStatus)
if !ok {
t.Fatalf("Sys got %T %v, want syscall.WaitStatus", ee.Sys(), ee.Sys())
}
if ws.Signal() != syscall.SIGABRT {
t.Fatalf("Signal got %d want SIGABRT", ws.Signal())
}
if !ws.CoreDump() {
t.Fatalf("CoreDump got %v want true", ws.CoreDump())
}

coreFile := "core"
if coreUsesPID {
coreFile += fmt.Sprintf(".%d", pid)
}

// Execute gdb commands.
args := []string{"-nx", "-batch",
"-iex", "add-auto-load-safe-path " + filepath.Join(testenv.GOROOT(t), "src", "runtime"),
"-ex", "backtrace",
filepath.Join(dir, "a.exe"),
filepath.Join(dir, coreFile),
}
cmd = testenv.Command(t, "gdb", args...)

got, err := cmd.CombinedOutput()
t.Logf("gdb output:\n%s", got)
if err != nil {
t.Fatalf("gdb exited with error: %v", err)
}

re := regexp.MustCompile(`#.* trigger_crash`)
if found := re.Find(got) != nil; !found {
t.Fatalf("could not find trigger_crash in backtrace")
}
}
37 changes: 26 additions & 11 deletions src/runtime/signal_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -597,7 +597,7 @@ func adjustSignalStack(sig uint32, mp *m, gsigStack *gsignalStack) bool {

// crashing is the number of m's we have waited for when implementing
// GOTRACEBACK=crash when a signal is received.
var crashing int32
var crashing atomic.Int32

// testSigtrap and testSigusr1 are used by the runtime tests. If
// non-nil, it is called on SIGTRAP/SIGUSR1. If it returns true, the
Expand Down Expand Up @@ -730,7 +730,7 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
mp.throwing = throwTypeRuntime
mp.caughtsig.set(gp)

if crashing == 0 {
if crashing.Load() == 0 {
startpanic_m()
}

Expand All @@ -740,32 +740,47 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
if level > 0 {
goroutineheader(gp)
tracebacktrap(c.sigpc(), c.sigsp(), c.siglr(), gp)
if crashing > 0 && gp != mp.curg && mp.curg != nil && readgstatus(mp.curg)&^_Gscan == _Grunning {
if crashing.Load() > 0 && gp != mp.curg && mp.curg != nil && readgstatus(mp.curg)&^_Gscan == _Grunning {
// tracebackothers on original m skipped this one; trace it now.
goroutineheader(mp.curg)
traceback(^uintptr(0), ^uintptr(0), 0, mp.curg)
} else if crashing == 0 {
} else if crashing.Load() == 0 {
tracebackothers(gp)
print("\n")
}
dumpregs(c)
}

if docrash {
crashing++
if crashing < mcount()-int32(extraMLength.Load()) {
isCrashThread := false
if crashing.CompareAndSwap(0, 1) {
isCrashThread = true
} else {
crashing.Add(1)
}
if crashing.Load() < mcount()-int32(extraMLength.Load()) {
// There are other m's that need to dump their stacks.
// Relay SIGQUIT to the next m by sending it to the current process.
// All m's that have already received SIGQUIT have signal masks blocking
// receipt of any signals, so the SIGQUIT will go to an m that hasn't seen it yet.
// When the last m receives the SIGQUIT, it will fall through to the call to
// crash below. Just in case the relaying gets botched, each m involved in
// The first m will wait until all ms received the SIGQUIT, then crash/exit.
// Just in case the relaying gets botched, each m involved in
// the relay sleeps for 5 seconds and then does the crash/exit itself.
// In expected operation, the last m has received the SIGQUIT and run
// crash/exit and the process is gone, all long before any of the
// 5-second sleeps have finished.
// The faulting m is crashing first so it is the faulting thread in the core dump (see issue #63277):
// in expected operation, the first m will wait until the last m has received the SIGQUIT,
// and then run crash/exit and the process is gone.
// However, if it spends more than 5 seconds to send SIGQUIT to all ms,
// any of ms may crash/exit the process after waiting for 5 seconds.
print("\n-----\n\n")
raiseproc(_SIGQUIT)
}
if isCrashThread {
i := 0
for (crashing.Load() < mcount()-int32(extraMLength.Load())) && i < 10 {
i++
usleep(500 * 1000)
}
} else {
usleep(5 * 1000 * 1000)
}
printDebugLog()
Expand Down

0 comments on commit de5b418

Please sign in to comment.