From 45b28530a7e39c5fa50f446669cdcf733c629b46 Mon Sep 17 00:00:00 2001 From: Tim Hockin Date: Thu, 11 Mar 2021 12:55:42 -0800 Subject: [PATCH] Fix exit non-zero exit codes when running as pid1 (v4 branch) Prior to this we would swallow the exit code and always exit(0). --- cmd/git-sync/main.go | 7 ++--- pkg/pid1/pid1.go | 51 +++++++++++++++++++++------------ pkg/pid1/test/fast-exit/main.go | 8 ++---- pkg/pid1/test/fast-exit/test.sh | 8 ++++++ test_e2e.sh | 27 +++++++++++++++++ 5 files changed, 71 insertions(+), 30 deletions(-) diff --git a/cmd/git-sync/main.go b/cmd/git-sync/main.go index c0e51c875..6e15dfbc3 100644 --- a/cmd/git-sync/main.go +++ b/cmd/git-sync/main.go @@ -269,12 +269,9 @@ func main() { // In case we come up as pid 1, act as init. if os.Getpid() == 1 { fmt.Fprintf(os.Stderr, "INFO: detected pid 1, running init handler\n") - err := pid1.ReRun() + code, err := pid1.ReRun() if err == nil { - os.Exit(0) - } - if exerr, ok := err.(*exec.ExitError); ok { - os.Exit(exerr.ExitCode()) + os.Exit(code) } fmt.Fprintf(os.Stderr, "ERROR: unhandled pid1 error: %v\n", err) os.Exit(127) diff --git a/pkg/pid1/pid1.go b/pkg/pid1/pid1.go index 5421b91cd..fc3abc15a 100644 --- a/pkg/pid1/pid1.go +++ b/pkg/pid1/pid1.go @@ -11,54 +11,67 @@ import ( // ReRun converts the current process into a bare-bones init, runs the current // commandline as a child process, and waits for it to complete. The new child // process shares stdin/stdout/stderr with the parent. When the child exits, -// this will return the same value as exec.Command.Run(). If there is an error -// in reaping children that this can not handle, it will panic. -func ReRun() error { +// this will return the exit code. If this returns an error, the child process +// may not be terminated. +func ReRun() (int, error) { bin, err := os.Readlink("/proc/self/exe") if err != nil { - return err + return 0, err } cmd := exec.Command(bin, os.Args[1:]...) cmd.Stdin = os.Stdin cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr if err := cmd.Start(); err != nil { - return err + return 0, err } - runInit(cmd.Process.Pid) - return nil + return runInit(cmd.Process.Pid) } -// runInit runs a bare-bones init process. This will return when firstborn -// exits. In case of truly unknown errors it will panic. -func runInit(firstborn int) { +// runInit runs a bare-bones init process. When firstborn exits, this will +// return the exit code. If this returns an error, the child process may not +// be terminated. +func runInit(firstborn int) (int, error) { sigs := make(chan os.Signal, 8) signal.Notify(sigs) for sig := range sigs { if sig != syscall.SIGCHLD { // Pass it on to the real process. - syscall.Kill(firstborn, sig.(syscall.Signal)) + if err := syscall.Kill(firstborn, sig.(syscall.Signal)); err != nil { + return 0, err + } } // Always try to reap a child - empirically, sometimes this gets missed. - if sigchld(firstborn) { - return + die, status, err := sigchld(firstborn) + if err != nil { + return 0, err + } + if die { + if status.Signaled() { + return 128 + int(status.Signal()), nil + } + if status.Exited() { + return status.ExitStatus(), nil + } + return 0, fmt.Errorf("unhandled exit status: 0x%x\n", status) } } + return 0, fmt.Errorf("signal handler terminated unexpectedly") } -// sigchld handles a SIGCHLD. This will return true when firstborn exits. In -// case of truly unknown errors it will panic. -func sigchld(firstborn int) bool { +// sigchld handles a SIGCHLD. This will return true only when firstborn exits. +// For any other process this will return false and a 0 status. +func sigchld(firstborn int) (bool, syscall.WaitStatus, error) { // Loop to handle multiple child processes. for { var status syscall.WaitStatus pid, err := syscall.Wait4(-1, &status, syscall.WNOHANG, nil) if err != nil { - panic(fmt.Sprintf("failed to wait4(): %v\n", err)) + return false, 0, fmt.Errorf("wait4(): %v\n", err) } if pid == firstborn { - return true + return true, status, nil } if pid <= 0 { // No more children to reap. @@ -66,5 +79,5 @@ func sigchld(firstborn int) bool { } // Must have found one, see if there are more. } - return false + return false, 0, nil } diff --git a/pkg/pid1/test/fast-exit/main.go b/pkg/pid1/test/fast-exit/main.go index 3f51a81d6..25f9c6b4d 100644 --- a/pkg/pid1/test/fast-exit/main.go +++ b/pkg/pid1/test/fast-exit/main.go @@ -4,7 +4,6 @@ package main import ( "fmt" "os" - "os/exec" "k8s.io/git-sync/pkg/pid1" ) @@ -13,12 +12,9 @@ func main() { // In case we come up as pid 1, act as init. if os.Getpid() == 1 { fmt.Printf("detected pid 1, running as init\n") - err := pid1.ReRun() + code, err := pid1.ReRun() if err == nil { - os.Exit(0) - } - if exerr, ok := err.(*exec.ExitError); ok { - os.Exit(exerr.ExitCode()) + os.Exit(code) } fmt.Printf("unhandled pid1 error: %v\n", err) os.Exit(127) diff --git a/pkg/pid1/test/fast-exit/test.sh b/pkg/pid1/test/fast-exit/test.sh index a86630e00..8d06d1f21 100755 --- a/pkg/pid1/test/fast-exit/test.sh +++ b/pkg/pid1/test/fast-exit/test.sh @@ -3,6 +3,14 @@ go build docker build -t example.com/fast-exit . +# This should exit(42) +docker run -ti --rm example.com/fast-exit +RET=$? +if [ "$RET" != 42 ]; then + echo "FAIL: exit code was not preserved: $RET" + exit 1 +fi + # In the past we have observed hangs and missed signals. This *should* run # forever. while true; do diff --git a/test_e2e.sh b/test_e2e.sh index 4ce211cf0..d55037a31 100755 --- a/test_e2e.sh +++ b/test_e2e.sh @@ -254,6 +254,33 @@ assert_file_eq "$ROOT"/link/file "$TESTCASE" # Wrap up pass +############################################## +# Test non-zero exit +############################################## +testcase "non-zero-exit" +echo "$TESTCASE" > "$REPO"/file +git -C "$REPO" commit -qam "$TESTCASE" +ln -s "$ROOT" "$DIR/rootlink" # symlink to test +( + set +o errexit + GIT_SYNC \ + --one-time \ + --repo="file://$REPO" \ + --branch=e2e-branch \ + --rev=does-not-exit \ + --root="$DIR/rootlink" \ + --link="link" \ + > "$DIR"/log."$TESTCASE" 2>&1 + RET=$? + if [[ "$RET" != 1 ]]; then + fail "expected exit code 1, got $RET" + fi + assert_file_absent "$ROOT"/link + assert_file_absent "$ROOT"/link/file +) +# Wrap up +pass + ############################################## # Test default syncing (master) ##############################################