Skip to content

Commit

Permalink
StopContainer: return if cleanup process changed state
Browse files Browse the repository at this point in the history
Commit 067442b improved stopping/killing a container by detecting
whether the cleanup process has already fired and changed the state of
the container.  Further improve on that by returning early instead of
trying to wait for the PID to finish.  At that point we know that the
container has exited but the previous PID may have been recycled
already by the kernel.

[NO NEW TESTS NEEDED] - the absence of the two flaking tests recorded
in containers#17142 will tell.

Fixes: containers#17142
Signed-off-by: Valentin Rothberg <vrothberg@redhat.com>
  • Loading branch information
vrothberg committed Jan 19, 2023
1 parent e0f6710 commit fd42c1d
Showing 1 changed file with 16 additions and 13 deletions.
29 changes: 16 additions & 13 deletions libpod/oci_conmon_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -430,20 +430,23 @@ func (r *ConmonOCIRuntime) StopContainer(ctr *Container, timeout uint, all bool)
// If the timeout was set to 0 or if stopping the container with the
// specified signal did not work, use the big hammer with SIGKILL.
if err := r.KillContainer(ctr, uint(unix.SIGKILL), all); err != nil {
// Ignore the error if KillContainer complains about it already
// being stopped or exited. There's an inherent race with the
// cleanup process (see #16142).
if !(errors.Is(err, define.ErrCtrStateInvalid) && ctr.ensureState(define.ContainerStateStopped, define.ContainerStateExited)) {
// If the PID is 0, then the container is already stopped.
if ctr.state.PID == 0 {
return nil
}
// Again, check if the container is gone. If it is, exit cleanly.
if aliveErr := unix.Kill(ctr.state.PID, 0); errors.Is(aliveErr, unix.ESRCH) {
return nil
}
return fmt.Errorf("sending SIGKILL to container %s: %w", ctr.ID(), err)
// There's an inherent race with the cleanup process (see
// #16142, #17142). If the container has already been marked as
// stopped or exited by the cleanup process, we can return
// immediately.
if errors.Is(err, define.ErrCtrStateInvalid) && ctr.ensureState(define.ContainerStateStopped, define.ContainerStateExited) {
return nil
}

// If the PID is 0, then the container is already stopped.
if ctr.state.PID == 0 {
return nil
}
// Again, check if the container is gone. If it is, exit cleanly.
if aliveErr := unix.Kill(ctr.state.PID, 0); errors.Is(aliveErr, unix.ESRCH) {
return nil
}
return fmt.Errorf("sending SIGKILL to container %s: %w", ctr.ID(), err)
}

// Give runtime a few seconds to make it happen
Expand Down

0 comments on commit fd42c1d

Please sign in to comment.