From fd42c1dcb8d7cc86e6e62c7628a6093573a9ed36 Mon Sep 17 00:00:00 2001 From: Valentin Rothberg Date: Thu, 19 Jan 2023 11:03:23 +0100 Subject: [PATCH] StopContainer: return if cleanup process changed state Commit 067442b5701f improved stopping/killing a container by detecting whether the cleanup process has already fired and changed the state of the container. Further improve on that by returning early instead of trying to wait for the PID to finish. At that point we know that the container has exited but the previous PID may have been recycled already by the kernel. [NO NEW TESTS NEEDED] - the absence of the two flaking tests recorded in #17142 will tell. Fixes: #17142 Signed-off-by: Valentin Rothberg --- libpod/oci_conmon_common.go | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/libpod/oci_conmon_common.go b/libpod/oci_conmon_common.go index 23ee52cf8a..c11787ced3 100644 --- a/libpod/oci_conmon_common.go +++ b/libpod/oci_conmon_common.go @@ -430,20 +430,23 @@ func (r *ConmonOCIRuntime) StopContainer(ctr *Container, timeout uint, all bool) // If the timeout was set to 0 or if stopping the container with the // specified signal did not work, use the big hammer with SIGKILL. if err := r.KillContainer(ctr, uint(unix.SIGKILL), all); err != nil { - // Ignore the error if KillContainer complains about it already - // being stopped or exited. There's an inherent race with the - // cleanup process (see #16142). - if !(errors.Is(err, define.ErrCtrStateInvalid) && ctr.ensureState(define.ContainerStateStopped, define.ContainerStateExited)) { - // If the PID is 0, then the container is already stopped. - if ctr.state.PID == 0 { - return nil - } - // Again, check if the container is gone. If it is, exit cleanly. - if aliveErr := unix.Kill(ctr.state.PID, 0); errors.Is(aliveErr, unix.ESRCH) { - return nil - } - return fmt.Errorf("sending SIGKILL to container %s: %w", ctr.ID(), err) + // There's an inherent race with the cleanup process (see + // #16142, #17142). If the container has already been marked as + // stopped or exited by the cleanup process, we can return + // immediately. + if errors.Is(err, define.ErrCtrStateInvalid) && ctr.ensureState(define.ContainerStateStopped, define.ContainerStateExited) { + return nil + } + + // If the PID is 0, then the container is already stopped. + if ctr.state.PID == 0 { + return nil + } + // Again, check if the container is gone. If it is, exit cleanly. + if aliveErr := unix.Kill(ctr.state.PID, 0); errors.Is(aliveErr, unix.ESRCH) { + return nil } + return fmt.Errorf("sending SIGKILL to container %s: %w", ctr.ID(), err) } // Give runtime a few seconds to make it happen