Skip to content

Commit

Permalink
wait: fix handling of multiple conditions with exited
Browse files Browse the repository at this point in the history
As it turns on things are not so simple after all...
In podman-py it was reported[1] that waiting might hang, per our docs wait
on multiple conditions should exit once the first one is hit and not all
of them. However because the new wait logic never checked if the context
was cancelled the goroutine kept running until conmon exited and because
we used a waitgroup to wait for all of them to finish it blocked until
that happened.

First we can remove the waitgroup as we only need to wait for one of
them anyway via the channel. While this alone fixes the hang it would
still leak the other goroutine. As there is no way to cancel a goroutine
all the code must check for a cancelled context in the wait loop to no
leak.

Fixes 8a94331 ("libpod: simplify WaitForExit()")
[1] containers/podman-py#425

Signed-off-by: Paul Holzinger <pholzing@redhat.com>
  • Loading branch information
Luap99 committed Sep 17, 2024
1 parent f4a08f4 commit fbed3a0
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 13 deletions.
30 changes: 17 additions & 13 deletions libpod/container_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import (
"io"
"net/http"
"os"
"sync"
"time"

"github.com/containers/common/pkg/resize"
Expand Down Expand Up @@ -596,7 +595,7 @@ func (c *Container) WaitForExit(ctx context.Context, pollInterval time.Duration)

// we cannot wait locked as we would hold the lock forever, so we unlock and then lock again
c.lock.Unlock()
err := waitForConmonExit(conmonPID, conmonPidFd, pollInterval)
err := waitForConmonExit(ctx, conmonPID, conmonPidFd, pollInterval)
c.lock.Lock()
if err != nil {
return -1, fmt.Errorf("failed to wait for conmon to exit: %w", err)
Expand All @@ -619,15 +618,24 @@ func (c *Container) WaitForExit(ctx context.Context, pollInterval time.Duration)
return c.runtime.state.GetContainerExitCode(id)
}

func waitForConmonExit(conmonPID, conmonPidFd int, pollInterval time.Duration) error {
func waitForConmonExit(ctx context.Context, conmonPID, conmonPidFd int, pollInterval time.Duration) error {
if conmonPidFd > -1 {
for {
fds := []unix.PollFd{{Fd: int32(conmonPidFd), Events: unix.POLLIN}}
if _, err := unix.Poll(fds, -1); err != nil {
if n, err := unix.Poll(fds, int(pollInterval.Milliseconds())); err != nil {
if err == unix.EINTR {
continue
}
return err
} else if n == 0 {
// n == 0 means timeout
select {
case <-ctx.Done():
return define.ErrCanceled
default:
// context not done, wait again
continue
}
}
return nil
}
Expand All @@ -640,7 +648,11 @@ func waitForConmonExit(conmonPID, conmonPidFd int, pollInterval time.Duration) e
}
return err
}
time.Sleep(pollInterval)
select {
case <-ctx.Done():
return define.ErrCanceled
case <-time.After(pollInterval):
}
}
return nil
}
Expand Down Expand Up @@ -695,22 +707,15 @@ func (c *Container) WaitForConditionWithInterval(ctx context.Context, waitTimeou
}
}

var wg sync.WaitGroup

if waitForExit {
wg.Add(1)
go func() {
defer wg.Done()

code, err := c.WaitForExit(ctx, waitTimeout)
trySend(code, err)
}()
}

if len(wantedStates) > 0 || len(wantedHealthStates) > 0 {
wg.Add(1)
go func() {
defer wg.Done()
stoppedCount := 0
for {
if len(wantedStates) > 0 {
Expand Down Expand Up @@ -780,7 +785,6 @@ func (c *Container) WaitForConditionWithInterval(ctx context.Context, waitTimeou
case <-ctx.Done():
result = waitResult{-1, define.ErrCanceled}
}
wg.Wait()
return result.code, result.err
}

Expand Down
15 changes: 15 additions & 0 deletions test/e2e/wait_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,4 +108,19 @@ var _ = Describe("Podman wait", func() {
Expect(session).Should(ExitCleanly())
Expect(session.OutputToStringArray()).To(Equal([]string{"0", "0", "0"}))
})

It("podman wait on multiple conditions", func() {
session := podmanTest.Podman([]string{"run", "-d", ALPINE, "sleep", "100"})
session.Wait(20)
Expect(session).Should(ExitCleanly())
cid := session.OutputToString()

// condition should return once nay of the condition is met not all of them,
// as the container is running this should return immediately
// https://github.com/containers/podman-py/issues/425
session = podmanTest.Podman([]string{"wait", "--condition", "running,exited", cid})
session.Wait(20)
Expect(session).Should(ExitCleanly())
Expect(session.OutputToString()).To(Equal("-1"))
})
})

0 comments on commit fbed3a0

Please sign in to comment.