Skip to content

Commit

Permalink
runc kill: fix sending KILL to non-pidns container
Browse files Browse the repository at this point in the history
Commit f8ad20f made it impossible to kill leftover processes in a
stopped container that does not have its own PID namespace. In other
words, if a container init is gone, it is no longer possible to use
`runc kill` to kill the leftover processes.

Fix this by moving the check if container init exists to after the
special case of handling the container without own PID namespace.

While at it, fix the minor issue introduced by commit 9583b3d:
if signalAllProcesses is used, there is no need to thaw the
container (as freeze/thaw is either done in signalAllProcesses already,
or not needed at all).

Also, make signalAllProcesses return an error early if the container
cgroup does not exist (as it relies on it to do its job). This way, the
error message returned is more generic and easier to understand
("container not running" instead of "can't open file").

Finally, add a test case.

Fixes: f8ad20f
Fixes: 9583b3d
Co-authored-by: lifubang <lifubang@acmcoder.com>
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
  • Loading branch information
kolyshkin and lifubang committed Nov 4, 2023
1 parent 094e39f commit 7de61c4
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 22 deletions.
21 changes: 11 additions & 10 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -364,25 +364,26 @@ func (c *Container) start(process *Process) (retErr error) {
func (c *Container) Signal(s os.Signal) error {
c.m.Lock()
defer c.m.Unlock()
// To avoid a PID reuse attack, don't kill non-running container.
if !c.hasInit() {
return ErrNotRunning
}

// When a container has its own PID namespace, inside it the init PID
// is 1, and thus it is handled specially by the kernel. In particular,
// killing init with SIGKILL from an ancestor namespace will also kill
// all other processes in that PID namespace (see pid_namespaces(7)).
//
// OTOH, if PID namespace is shared, we should kill all pids to avoid
// leftover processes.
var err error
// leftover processes. Handle this special case here.
if s == unix.SIGKILL && !c.config.Namespaces.IsPrivate(configs.NEWPID) {
err = signalAllProcesses(c.cgroupManager, unix.SIGKILL)
} else {
err = c.initProcess.signal(s)
if err := signalAllProcesses(c.cgroupManager, unix.SIGKILL); err != nil {
return fmt.Errorf("unable to kill all processes: %w", err)
}
return nil
}
if err != nil {

// To avoid a PID reuse attack, don't kill non-running container.
if !c.hasInit() {
return ErrNotRunning
}
if err := c.initProcess.signal(s); err != nil {
return fmt.Errorf("unable to signal init: %w", err)
}
if s == unix.SIGKILL {
Expand Down
3 changes: 3 additions & 0 deletions libcontainer/init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -661,6 +661,9 @@ func setupPersonality(config *configs.Config) error {
// signalAllProcesses freezes then iterates over all the processes inside the
// manager's cgroups sending the signal s to them.
func signalAllProcesses(m cgroups.Manager, s unix.Signal) error {
if !m.Exists() {
return ErrNotRunning
}
// Use cgroup.kill, if available.
if s == unix.SIGKILL {
if p := m.Path(""); p != "" { // Either cgroup v2 or hybrid.
Expand Down
66 changes: 54 additions & 12 deletions tests/integration/kill.bats
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ function teardown() {
teardown_bundle
}

# shellcheck disable=SC2030
@test "kill detached busybox" {
# run busybox detached
runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
Expand All @@ -35,16 +36,15 @@ function teardown() {
[ "$status" -eq 0 ]
}

# This is roughly the same as TestPIDHostInitProcessWait in libcontainer/integration.
@test "kill KILL [host pidns]" {
# kill -a currently requires cgroup freezer.
test_host_pidns_kill() {
requires cgroups_freezer

update_config ' .linux.namespaces -= [{"type": "pid"}]'
set_cgroups_path
if [ $EUID -ne 0 ]; then
# kill --all requires working cgroups.
requires rootless_cgroup
# Can't mount real /proc when rootless + no pidns,
# so change it to a bind-mounted one from the host.
update_config ' .mounts |= map((select(.type == "proc")
| .type = "none"
| .source = "/proc"
Expand All @@ -53,32 +53,74 @@ function teardown() {
fi

runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
# shellcheck disable=SC2031
[ "$status" -eq 0 ]
cgpath=$(get_cgroup_path "pids")
init_pid=$(cat "$cgpath"/cgroup.procs)

# Start a few more processes.
for _ in 1 2 3 4 5; do
__runc exec -d test_busybox sleep 1h
[ "$status" -eq 0 ]
done

if [ -v KILL_INIT ]; then
# Now kill the container's init process. Since the container do
# not have own PID ns, its init is no special and the container
# will still be up and running (except for rootless container
# AND systemd cgroup driver AND systemd > v245, when systemd
# kills the container; see "kill KILL [host pidns + init gone]"
# below).
kill -9 "$init_pid"
fi

# Get the list of all container processes.
path=$(get_cgroup_path "pids")
pids=$(cat "$path"/cgroup.procs)
pids=$(cat "$cgpath"/cgroup.procs)
echo "pids: $pids"
# Sanity check -- make sure all processes exist.
for p in $pids; do
kill -0 "$p"
done

runc kill test_busybox KILL
# shellcheck disable=SC2031
[ "$status" -eq 0 ]
wait_for_container 10 1 test_busybox stopped

# Make sure all processes are gone.
pids=$(cat "$path"/cgroup.procs) || true # OK if cgroup is gone
pids=$(cat "$cgpath"/cgroup.procs) || true # OK if cgroup is gone
echo "pids: $pids"
for p in $pids; do
run ! kill -0 "$p"
[[ "$output" = *"No such process" ]]
done
[ -z "$pids" ]
}

# This is roughly the same as TestPIDHostInitProcessWait in libcontainer/integration.
# The differences are:
#
# 1. Here we use separate processes to create and to kill a container, so the
# processes inside a container are not children of "runc kill".
#
# 2. We hit different codepaths (nonChildProcess.signal rather than initProcess.signal).
@test "kill KILL [host pidns]" {
unset KILL_INIT
test_host_pidns_kill
}

# Same as above plus:
#
# 3. Test runc kill on a container whose init process is gone.
#
# Issue 4047, case "runc kill".
@test "kill KILL [host pidns + init gone]" {
# Apparently, for rootless test, when using systemd cgroup manager,
# newer versions of systemd clean up the container as soon as its init
# process is gone. This is all fine and dandy, except it prevents us to
# test this case, thus we skip the test.
#
# It is not entirely clear which systemd version got this feature:
# v245 works fine, and v249 does not.
if [ $EUID -ne 0 ] && [ -v RUNC_USE_SYSTEMD ] && [ "$(systemd_version)" -gt 245 ]; then
skip "rootless+systemd conflicts with systemd > 245"
fi
KILL_INIT=1
test_host_pidns_kill
unset KILL_INIT
}

0 comments on commit 7de61c4

Please sign in to comment.