From 971854c15b9723387694f021b92d29d4f29f5ad1 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Fri, 8 Oct 2021 12:47:20 -0700 Subject: [PATCH] Refactor log and reaper exec to omit MAINPID Using MAINPID breaks systemd's exit detection, as it stops watching the original pid, but is unable to watch the new pid as it is not a child of systemd itself. The best we can do is just notify when execing the child process. We also need to consolidate forking into a sigle place so that we don't end up with multiple levels of child processes if both redirecting log output and reaping child processes. Signed-off-by: Brad Davidson (cherry picked from commit dc18ef2e51e271d76131b4d187986fd2c42d401f) --- pkg/cli/agent/agent.go | 7 ++- pkg/cli/cmds/init_default.go | 2 +- pkg/cli/cmds/init_linux.go | 70 ++--------------------- pkg/cli/cmds/log.go | 34 +----------- pkg/cli/cmds/log_default.go | 7 +++ pkg/cli/cmds/log_linux.go | 104 +++++++++++++++++++++++++++++++++++ pkg/cli/server/server.go | 7 ++- 7 files changed, 126 insertions(+), 105 deletions(-) create mode 100644 pkg/cli/cmds/log_default.go create mode 100644 pkg/cli/cmds/log_linux.go diff --git a/pkg/cli/agent/agent.go b/pkg/cli/agent/agent.go index 1768e10b83fa..608209ffd8e6 100644 --- a/pkg/cli/agent/agent.go +++ b/pkg/cli/agent/agent.go @@ -23,12 +23,13 @@ func Run(ctx *cli.Context) error { // database credentials or other secrets. gspt.SetProcTitle(os.Args[0] + " agent") - // Do init stuff if pid 1. - // This must be done before InitLogging as that may reexec in order to capture log output - if err := cmds.HandleInit(); err != nil { + // Evacuate cgroup v2 before doing anything else that may fork. + if err := cmds.EvacuateCgroup2(); err != nil { return err } + // Initialize logging, and subprocess reaping if necessary. + // Log output redirection and subprocess reaping both require forking. if err := cmds.InitLogging(); err != nil { return err } diff --git a/pkg/cli/cmds/init_default.go b/pkg/cli/cmds/init_default.go index d86906ac6c44..12da597a5fa9 100644 --- a/pkg/cli/cmds/init_default.go +++ b/pkg/cli/cmds/init_default.go @@ -2,6 +2,6 @@ package cmds -func HandleInit() error { +func EvacuateCgroup2() error { return nil } diff --git a/pkg/cli/cmds/init_linux.go b/pkg/cli/cmds/init_linux.go index ce680282f457..425888124066 100644 --- a/pkg/cli/cmds/init_linux.go +++ b/pkg/cli/cmds/init_linux.go @@ -4,83 +4,21 @@ package cmds import ( "os" - "os/signal" - "syscall" "github.com/containerd/containerd/pkg/userns" - "github.com/erikdubbelboer/gspt" "github.com/pkg/errors" - "github.com/rancher/k3s/pkg/version" "github.com/rootless-containers/rootlesskit/pkg/parent/cgrouputil" ) -// HandleInit takes care of things that need to be done when running as process 1, usually in a -// Docker container. This includes evacuating the root cgroup and reaping child pids. -func HandleInit() error { - if os.Getpid() != 1 { - return nil - } - - if !userns.RunningInUserNS() { +// EvacuateCgroup2 will handle evacuating the root cgroup in order to enable subtree_control, +// if running as pid 1 without rootless support. +func EvacuateCgroup2() error { + if os.Getpid() == 1 && !userns.RunningInUserNS() { // The root cgroup has to be empty to enable subtree_control, so evacuate it by placing // ourselves in the init cgroup. if err := cgrouputil.EvacuateCgroup2("init"); err != nil { return errors.Wrap(err, "failed to evacuate root cgroup") } } - - pwd, err := os.Getwd() - if err != nil { - return errors.Wrap(err, "failed to get working directory for init process") - } - - go reapChildren() - - // fork the main process to do work so that this init process can handle reaping pids - // without interfering with any other exec's that the rest of the codebase may do. - var wstatus syscall.WaitStatus - pattrs := &syscall.ProcAttr{ - Dir: pwd, - Env: os.Environ(), - Sys: &syscall.SysProcAttr{Setsid: true}, - Files: []uintptr{ - uintptr(syscall.Stdin), - uintptr(syscall.Stdout), - uintptr(syscall.Stderr), - }, - } - pid, err := syscall.ForkExec(os.Args[0], os.Args, pattrs) - if err != nil { - return errors.Wrap(err, "failed to fork/exec "+version.Program) - } - - gspt.SetProcTitle(os.Args[0] + " init") - // wait for main process to exit, and return its status when it does - _, err = syscall.Wait4(pid, &wstatus, 0, nil) - for err == syscall.EINTR { - _, err = syscall.Wait4(pid, &wstatus, 0, nil) - } - os.Exit(wstatus.ExitStatus()) return nil } - -//reapChildren calls Wait4 whenever SIGCHLD is received -func reapChildren() { - sigs := make(chan os.Signal, 1) - signal.Notify(sigs, syscall.SIGCHLD) - for { - select { - case <-sigs: - } - for { - var wstatus syscall.WaitStatus - _, err := syscall.Wait4(-1, &wstatus, 0, nil) - for err == syscall.EINTR { - _, err = syscall.Wait4(-1, &wstatus, 0, nil) - } - if err == nil || err == syscall.ECHILD { - break - } - } - } -} diff --git a/pkg/cli/cmds/log.go b/pkg/cli/cmds/log.go index 44f6bd8d01f1..3f896fcc1153 100644 --- a/pkg/cli/cmds/log.go +++ b/pkg/cli/cmds/log.go @@ -3,15 +3,10 @@ package cmds import ( "flag" "fmt" - "io" - "os" "strconv" "sync" "time" - "github.com/docker/docker/pkg/reexec" - "github.com/natefinch/lumberjack" - "github.com/rancher/k3s/pkg/version" "github.com/urfave/cli" ) @@ -52,8 +47,8 @@ var ( func InitLogging() error { var rErr error logSetupOnce.Do(func() { - if LogConfig.LogFile != "" && os.Getenv("_K3S_LOG_REEXEC_") == "" { - rErr = runWithLogging() + if err := forkIfLoggingOrReaping(); err != nil { + rErr = err return } @@ -76,31 +71,6 @@ func checkUnixTimestamp() error { return nil } -func runWithLogging() error { - var ( - l io.Writer - ) - l = &lumberjack.Logger{ - Filename: LogConfig.LogFile, - MaxSize: 50, - MaxBackups: 3, - MaxAge: 28, - Compress: true, - } - if LogConfig.AlsoLogToStderr { - l = io.MultiWriter(l, os.Stderr) - } - - args := append([]string{version.Program}, os.Args[1:]...) - cmd := reexec.Command(args...) - cmd.Env = os.Environ() - cmd.Env = append(cmd.Env, "_K3S_LOG_REEXEC_=true") - cmd.Stderr = l - cmd.Stdout = l - cmd.Stdin = os.Stdin - return cmd.Run() -} - func setupLogging() { flag.Set("v", strconv.Itoa(LogConfig.VLevel)) flag.Set("vmodule", LogConfig.VModule) diff --git a/pkg/cli/cmds/log_default.go b/pkg/cli/cmds/log_default.go new file mode 100644 index 000000000000..b056c2cfedd9 --- /dev/null +++ b/pkg/cli/cmds/log_default.go @@ -0,0 +1,7 @@ +// +build !linux !cgo + +package cmds + +func forkIfLoggingOrReaping() error { + return nil +} diff --git a/pkg/cli/cmds/log_linux.go b/pkg/cli/cmds/log_linux.go new file mode 100644 index 000000000000..4a95cb012de7 --- /dev/null +++ b/pkg/cli/cmds/log_linux.go @@ -0,0 +1,104 @@ +// +build linux,cgo + +package cmds + +import ( + "io" + "os" + "os/exec" + "os/signal" + "syscall" + + systemd "github.com/coreos/go-systemd/daemon" + "github.com/erikdubbelboer/gspt" + "github.com/natefinch/lumberjack" + "github.com/pkg/errors" + "github.com/rancher/k3s/pkg/version" + "golang.org/x/sys/unix" +) + +// forkIfLoggingOrReaping handles forking off the actual k3s process if it is necessary to +// capture log output, or reap child processes. Reaping is only necessary when running +// as pid 1. +func forkIfLoggingOrReaping() error { + var stdout, stderr io.Writer = os.Stdout, os.Stderr + enableLogRedirect := LogConfig.LogFile != "" && os.Getenv("_K3S_LOG_REEXEC_") == "" + enableReaping := os.Getpid() == 1 + + if enableLogRedirect { + var l io.Writer = &lumberjack.Logger{ + Filename: LogConfig.LogFile, + MaxSize: 50, + MaxBackups: 3, + MaxAge: 28, + Compress: true, + } + if LogConfig.AlsoLogToStderr { + l = io.MultiWriter(l, os.Stderr) + } + stdout = l + stderr = l + } + + if enableLogRedirect || enableReaping { + gspt.SetProcTitle(os.Args[0] + " init") + + pwd, err := os.Getwd() + if err != nil { + return errors.Wrap(err, "failed to get working directory") + } + + if enableReaping { + // If we're running as pid 1 we need to reap child processes or defunct containerd-shim + // child processes will accumulate. + unix.Prctl(unix.PR_SET_CHILD_SUBREAPER, uintptr(1), 0, 0, 0) + go reapChildren() + } + + args := append([]string{version.Program}, os.Args[1:]...) + env := append(os.Environ(), "_K3S_LOG_REEXEC_=true", "NOTIFY_SOCKET=") + cmd := &exec.Cmd{ + Path: os.Args[0], + Dir: pwd, + Args: args, + Env: env, + Stdin: os.Stdin, + Stdout: stdout, + Stderr: stderr, + SysProcAttr: &syscall.SysProcAttr{ + Setsid: true, + }, + } + if err := cmd.Start(); err != nil { + return err + } + + // The child process won't be allowed to notify, so we send one for it as soon as it's started, + // and then wait for it to exit and pass along the exit code. + systemd.SdNotify(true, "READY=1\n") + cmd.Wait() + os.Exit(cmd.ProcessState.ExitCode()) + } + return nil +} + +//reapChildren calls Wait4 whenever SIGCHLD is received +func reapChildren() { + sigs := make(chan os.Signal, 1) + signal.Notify(sigs, syscall.SIGCHLD) + for { + select { + case <-sigs: + } + for { + var wstatus syscall.WaitStatus + _, err := syscall.Wait4(-1, &wstatus, 0, nil) + for err == syscall.EINTR { + _, err = syscall.Wait4(-1, &wstatus, 0, nil) + } + if err == nil || err == syscall.ECHILD { + break + } + } + } +} diff --git a/pkg/cli/server/server.go b/pkg/cli/server/server.go index 78c5b58b19e5..77be5c2fb121 100644 --- a/pkg/cli/server/server.go +++ b/pkg/cli/server/server.go @@ -55,12 +55,13 @@ func run(app *cli.Context, cfg *cmds.Server, leaderControllers server.CustomCont // database credentials or other secrets. gspt.SetProcTitle(os.Args[0] + " server") - // Do init stuff if pid 1. - // This must be done before InitLogging as that may reexec in order to capture log output - if err := cmds.HandleInit(); err != nil { + // Evacuate cgroup v2 before doing anything else that may fork. + if err := cmds.EvacuateCgroup2(); err != nil { return err } + // Initialize logging, and subprocess reaping if necessary. + // Log output redirection and subprocess reaping both require forking. if err := cmds.InitLogging(); err != nil { return err }