Skip to content

Commit

Permalink
Refactor log and reaper exec to omit MAINPID
Browse files Browse the repository at this point in the history
Using MAINPID breaks systemd's exit detection, as it stops watching the
original pid, but is unable to watch the new pid as it is not a child
of systemd itself. The best we can do is just notify when execing the child
process.

We also need to consolidate forking into a sigle place so that we don't
end up with multiple levels of child processes if both redirecting log
output and reaping child processes.

Signed-off-by: Brad Davidson <brad.davidson@rancher.com>
(cherry picked from commit dc18ef2)
  • Loading branch information
brandond committed Oct 20, 2021
1 parent 3988ede commit 971854c
Show file tree
Hide file tree
Showing 7 changed files with 126 additions and 105 deletions.
7 changes: 4 additions & 3 deletions pkg/cli/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,13 @@ func Run(ctx *cli.Context) error {
// database credentials or other secrets.
gspt.SetProcTitle(os.Args[0] + " agent")

// Do init stuff if pid 1.
// This must be done before InitLogging as that may reexec in order to capture log output
if err := cmds.HandleInit(); err != nil {
// Evacuate cgroup v2 before doing anything else that may fork.
if err := cmds.EvacuateCgroup2(); err != nil {
return err
}

// Initialize logging, and subprocess reaping if necessary.
// Log output redirection and subprocess reaping both require forking.
if err := cmds.InitLogging(); err != nil {
return err
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/cli/cmds/init_default.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@

package cmds

func HandleInit() error {
func EvacuateCgroup2() error {
return nil
}
70 changes: 4 additions & 66 deletions pkg/cli/cmds/init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,83 +4,21 @@ package cmds

import (
"os"
"os/signal"
"syscall"

"github.com/containerd/containerd/pkg/userns"
"github.com/erikdubbelboer/gspt"
"github.com/pkg/errors"
"github.com/rancher/k3s/pkg/version"
"github.com/rootless-containers/rootlesskit/pkg/parent/cgrouputil"
)

// HandleInit takes care of things that need to be done when running as process 1, usually in a
// Docker container. This includes evacuating the root cgroup and reaping child pids.
func HandleInit() error {
if os.Getpid() != 1 {
return nil
}

if !userns.RunningInUserNS() {
// EvacuateCgroup2 will handle evacuating the root cgroup in order to enable subtree_control,
// if running as pid 1 without rootless support.
func EvacuateCgroup2() error {
if os.Getpid() == 1 && !userns.RunningInUserNS() {
// The root cgroup has to be empty to enable subtree_control, so evacuate it by placing
// ourselves in the init cgroup.
if err := cgrouputil.EvacuateCgroup2("init"); err != nil {
return errors.Wrap(err, "failed to evacuate root cgroup")
}
}

pwd, err := os.Getwd()
if err != nil {
return errors.Wrap(err, "failed to get working directory for init process")
}

go reapChildren()

// fork the main process to do work so that this init process can handle reaping pids
// without interfering with any other exec's that the rest of the codebase may do.
var wstatus syscall.WaitStatus
pattrs := &syscall.ProcAttr{
Dir: pwd,
Env: os.Environ(),
Sys: &syscall.SysProcAttr{Setsid: true},
Files: []uintptr{
uintptr(syscall.Stdin),
uintptr(syscall.Stdout),
uintptr(syscall.Stderr),
},
}
pid, err := syscall.ForkExec(os.Args[0], os.Args, pattrs)
if err != nil {
return errors.Wrap(err, "failed to fork/exec "+version.Program)
}

gspt.SetProcTitle(os.Args[0] + " init")
// wait for main process to exit, and return its status when it does
_, err = syscall.Wait4(pid, &wstatus, 0, nil)
for err == syscall.EINTR {
_, err = syscall.Wait4(pid, &wstatus, 0, nil)
}
os.Exit(wstatus.ExitStatus())
return nil
}

//reapChildren calls Wait4 whenever SIGCHLD is received
func reapChildren() {
sigs := make(chan os.Signal, 1)
signal.Notify(sigs, syscall.SIGCHLD)
for {
select {
case <-sigs:
}
for {
var wstatus syscall.WaitStatus
_, err := syscall.Wait4(-1, &wstatus, 0, nil)
for err == syscall.EINTR {
_, err = syscall.Wait4(-1, &wstatus, 0, nil)
}
if err == nil || err == syscall.ECHILD {
break
}
}
}
}
34 changes: 2 additions & 32 deletions pkg/cli/cmds/log.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,10 @@ package cmds
import (
"flag"
"fmt"
"io"
"os"
"strconv"
"sync"
"time"

"github.com/docker/docker/pkg/reexec"
"github.com/natefinch/lumberjack"
"github.com/rancher/k3s/pkg/version"
"github.com/urfave/cli"
)

Expand Down Expand Up @@ -52,8 +47,8 @@ var (
func InitLogging() error {
var rErr error
logSetupOnce.Do(func() {
if LogConfig.LogFile != "" && os.Getenv("_K3S_LOG_REEXEC_") == "" {
rErr = runWithLogging()
if err := forkIfLoggingOrReaping(); err != nil {
rErr = err
return
}

Expand All @@ -76,31 +71,6 @@ func checkUnixTimestamp() error {
return nil
}

func runWithLogging() error {
var (
l io.Writer
)
l = &lumberjack.Logger{
Filename: LogConfig.LogFile,
MaxSize: 50,
MaxBackups: 3,
MaxAge: 28,
Compress: true,
}
if LogConfig.AlsoLogToStderr {
l = io.MultiWriter(l, os.Stderr)
}

args := append([]string{version.Program}, os.Args[1:]...)
cmd := reexec.Command(args...)
cmd.Env = os.Environ()
cmd.Env = append(cmd.Env, "_K3S_LOG_REEXEC_=true")
cmd.Stderr = l
cmd.Stdout = l
cmd.Stdin = os.Stdin
return cmd.Run()
}

func setupLogging() {
flag.Set("v", strconv.Itoa(LogConfig.VLevel))
flag.Set("vmodule", LogConfig.VModule)
Expand Down
7 changes: 7 additions & 0 deletions pkg/cli/cmds/log_default.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
// +build !linux !cgo

package cmds

func forkIfLoggingOrReaping() error {
return nil
}
104 changes: 104 additions & 0 deletions pkg/cli/cmds/log_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
// +build linux,cgo

package cmds

import (
"io"
"os"
"os/exec"
"os/signal"
"syscall"

systemd "github.com/coreos/go-systemd/daemon"
"github.com/erikdubbelboer/gspt"
"github.com/natefinch/lumberjack"
"github.com/pkg/errors"
"github.com/rancher/k3s/pkg/version"
"golang.org/x/sys/unix"
)

// forkIfLoggingOrReaping handles forking off the actual k3s process if it is necessary to
// capture log output, or reap child processes. Reaping is only necessary when running
// as pid 1.
func forkIfLoggingOrReaping() error {
var stdout, stderr io.Writer = os.Stdout, os.Stderr
enableLogRedirect := LogConfig.LogFile != "" && os.Getenv("_K3S_LOG_REEXEC_") == ""
enableReaping := os.Getpid() == 1

if enableLogRedirect {
var l io.Writer = &lumberjack.Logger{
Filename: LogConfig.LogFile,
MaxSize: 50,
MaxBackups: 3,
MaxAge: 28,
Compress: true,
}
if LogConfig.AlsoLogToStderr {
l = io.MultiWriter(l, os.Stderr)
}
stdout = l
stderr = l
}

if enableLogRedirect || enableReaping {
gspt.SetProcTitle(os.Args[0] + " init")

pwd, err := os.Getwd()
if err != nil {
return errors.Wrap(err, "failed to get working directory")
}

if enableReaping {
// If we're running as pid 1 we need to reap child processes or defunct containerd-shim
// child processes will accumulate.
unix.Prctl(unix.PR_SET_CHILD_SUBREAPER, uintptr(1), 0, 0, 0)
go reapChildren()
}

args := append([]string{version.Program}, os.Args[1:]...)
env := append(os.Environ(), "_K3S_LOG_REEXEC_=true", "NOTIFY_SOCKET=")
cmd := &exec.Cmd{
Path: os.Args[0],
Dir: pwd,
Args: args,
Env: env,
Stdin: os.Stdin,
Stdout: stdout,
Stderr: stderr,
SysProcAttr: &syscall.SysProcAttr{
Setsid: true,
},
}
if err := cmd.Start(); err != nil {
return err
}

// The child process won't be allowed to notify, so we send one for it as soon as it's started,
// and then wait for it to exit and pass along the exit code.
systemd.SdNotify(true, "READY=1\n")
cmd.Wait()
os.Exit(cmd.ProcessState.ExitCode())
}
return nil
}

//reapChildren calls Wait4 whenever SIGCHLD is received
func reapChildren() {
sigs := make(chan os.Signal, 1)
signal.Notify(sigs, syscall.SIGCHLD)
for {
select {
case <-sigs:
}
for {
var wstatus syscall.WaitStatus
_, err := syscall.Wait4(-1, &wstatus, 0, nil)
for err == syscall.EINTR {
_, err = syscall.Wait4(-1, &wstatus, 0, nil)
}
if err == nil || err == syscall.ECHILD {
break
}
}
}
}
7 changes: 4 additions & 3 deletions pkg/cli/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,13 @@ func run(app *cli.Context, cfg *cmds.Server, leaderControllers server.CustomCont
// database credentials or other secrets.
gspt.SetProcTitle(os.Args[0] + " server")

// Do init stuff if pid 1.
// This must be done before InitLogging as that may reexec in order to capture log output
if err := cmds.HandleInit(); err != nil {
// Evacuate cgroup v2 before doing anything else that may fork.
if err := cmds.EvacuateCgroup2(); err != nil {
return err
}

// Initialize logging, and subprocess reaping if necessary.
// Log output redirection and subprocess reaping both require forking.
if err := cmds.InitLogging(); err != nil {
return err
}
Expand Down

0 comments on commit 971854c

Please sign in to comment.