Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

libpod: wait for healthy on main thread #22658

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 20 additions & 2 deletions libpod/container_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,10 @@ func (c *Container) Start(ctx context.Context, recursive bool) (finalErr error)
}

// Start the container
return c.start(ctx)
if err := c.start(); err != nil {
return err
}
return c.waitForHealthy(ctx)
}

// Update updates the given container.
Expand Down Expand Up @@ -194,6 +197,9 @@ func (c *Container) StartAndAttach(ctx context.Context, streams *define.AttachSt
opts.Start = true
opts.Started = startedChan

// attach and start the container on a different thread. waitForHealthy must
// be done later, as it requires to run on the same thread that holds the lock
// for the container.
if err := c.ociRuntime.Attach(c, opts); err != nil {
attachChan <- err
}
Expand All @@ -207,7 +213,7 @@ func (c *Container) StartAndAttach(ctx context.Context, streams *define.AttachSt
c.newContainerEvent(events.Attach)
}

return attachChan, nil
return attachChan, c.waitForHealthy(ctx)
}

// RestartWithTimeout restarts a running container and takes a given timeout in uint
Expand Down Expand Up @@ -775,6 +781,14 @@ func (c *Container) WaitForConditionWithInterval(ctx context.Context, waitTimeou
}
}
if len(wantedHealthStates) > 0 {
// even if we are interested only in the health check
// check that the container is still running to avoid
// waiting until the timeout expires.
state, err := c.State()
if err != nil {
trySend(-1, err)
return
}
status, err := c.HealthCheckStatus()
if err != nil {
trySend(-1, err)
Expand All @@ -784,6 +798,10 @@ func (c *Container) WaitForConditionWithInterval(ctx context.Context, waitTimeou
trySend(-1, nil)
return
}
if state != define.ContainerStateCreated && state != define.ContainerStateRunning && state != define.ContainerStatePaused {
trySend(-1, define.ErrCtrStopped)
return
}
}
select {
case <-ctx.Done():
Expand Down
32 changes: 23 additions & 9 deletions libpod/container_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -330,10 +330,10 @@ func (c *Container) handleRestartPolicy(ctx context.Context) (_ bool, retErr err
return false, err
}
}
if err := c.start(ctx); err != nil {
if err := c.start(); err != nil {
return false, err
}
return true, nil
return true, c.waitForHealthy(ctx)
}

// Ensure that the container is in a specific state or state.
Expand Down Expand Up @@ -1208,7 +1208,7 @@ func (c *Container) reinit(ctx context.Context, retainRetries bool) error {

// Initialize (if necessary) and start a container
// Performs all necessary steps to start a container that is not running
// Does not lock or check validity
// Does not lock or check validity, requires to run on the same thread that holds the lock for the container.
func (c *Container) initAndStart(ctx context.Context) (retErr error) {
// If we are ContainerStateUnknown, throw an error
if c.state.State == define.ContainerStateUnknown {
Expand Down Expand Up @@ -1253,11 +1253,14 @@ func (c *Container) initAndStart(ctx context.Context) (retErr error) {
}

// Now start the container
return c.start(ctx)
if err := c.start(); err != nil {
return err
}
return c.waitForHealthy(ctx)
}

// Internal, non-locking function to start a container
func (c *Container) start(ctx context.Context) error {
func (c *Container) start() error {
if c.config.Spec.Process != nil {
logrus.Debugf("Starting container %s with command %v", c.ID(), c.config.Spec.Process.Args)
}
Expand Down Expand Up @@ -1298,10 +1301,14 @@ func (c *Container) start(ctx context.Context) error {

c.newContainerEvent(events.Start)

if err := c.save(); err != nil {
return err
}
return c.save()
}

// waitForHealthy, when sdNotifyMode == SdNotifyModeHealthy, waits up to the DefaultWaitInterval
// for the container to get into the healthy state and reports the status to the notify socket.
// The function unlocks the container lock, so it must be called from the same thread that locks
// the container.
func (c *Container) waitForHealthy(ctx context.Context) error {
if c.config.SdNotifyMode != define.SdNotifyModeHealthy {
return nil
}
Expand All @@ -1315,6 +1322,9 @@ func (c *Container) start(ctx context.Context) error {
}

if _, err := c.WaitForConditionWithInterval(ctx, DefaultWaitInterval, define.HealthCheckHealthy); err != nil {
if errors.Is(err, define.ErrNoSuchCtr) {
return nil
}
return err
}

Expand Down Expand Up @@ -1566,6 +1576,7 @@ func (c *Container) unpause() error {
}

// Internal, non-locking function to restart a container
// It requires to run on the same thread that holds the lock.
func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (retErr error) {
if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated, define.ContainerStateRunning, define.ContainerStateStopped, define.ContainerStateExited) {
return fmt.Errorf("unable to restart a container in a paused or unknown state: %w", define.ErrCtrStateInvalid)
Expand Down Expand Up @@ -1613,7 +1624,10 @@ func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (retEr
return err
}
}
return c.start(ctx)
if err := c.start(); err != nil {
return err
}
return c.waitForHealthy(ctx)
}

// mountStorage sets up the container's root filesystem
Expand Down
4 changes: 2 additions & 2 deletions libpod/oci_conmon_attach_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
package libpod

import (
"context"
"errors"
"fmt"
"io"
Expand Down Expand Up @@ -32,6 +31,7 @@ const (
// Attach to the given container.
// Does not check if state is appropriate.
// started is only required if startContainer is true.
// It does not wait for the container to be healthy, it is the caller responsibility to do so.
func (r *ConmonOCIRuntime) Attach(c *Container, params *AttachOptions) error {
passthrough := c.LogDriver() == define.PassthroughLogging || c.LogDriver() == define.PassthroughTTYLogging

Expand Down Expand Up @@ -86,7 +86,7 @@ func (r *ConmonOCIRuntime) Attach(c *Container, params *AttachOptions) error {
// If starting was requested, start the container and notify when that's
// done.
if params.Start {
if err := c.start(context.TODO()); err != nil {
if err := c.start(); err != nil {
return err
}
params.Started <- true
Expand Down
14 changes: 8 additions & 6 deletions pkg/domain/infra/abi/containers.go
Original file line number Diff line number Diff line change
Expand Up @@ -995,7 +995,10 @@ func (ic *ContainerEngine) ContainerStart(ctx context.Context, namesOrIds []stri
return reports, fmt.Errorf("unable to start container %s: %w", ctr.ID(), err)
}

exitCode = ic.GetContainerExitCode(ctx, ctr.Container)
exitCode, err2 := ic.ContainerWaitForExitCode(ctx, ctr.Container)
if err2 != nil {
logrus.Errorf("Waiting for container %s: %v", ctr.ID(), err2)
}
reports = append(reports, &entities.ContainerStartReport{
Id: ctr.ID(),
RawInput: ctr.rawInput,
Expand Down Expand Up @@ -1189,7 +1192,7 @@ func (ic *ContainerEngine) ContainerRun(ctx context.Context, opts entities.Conta
report.ExitCode = define.ExitCode(err)
return &report, err
}
report.ExitCode = ic.GetContainerExitCode(ctx, ctr)
report.ExitCode, _ = ic.ContainerWaitForExitCode(ctx, ctr)
if opts.Rm && !ctr.ShouldRestart(ctx) {
if err := removeContainer(ctr, false); err != nil {
if errors.Is(err, define.ErrNoSuchCtr) ||
Expand All @@ -1203,14 +1206,13 @@ func (ic *ContainerEngine) ContainerRun(ctx context.Context, opts entities.Conta
return &report, nil
}

func (ic *ContainerEngine) GetContainerExitCode(ctx context.Context, ctr *libpod.Container) int {
func (ic *ContainerEngine) ContainerWaitForExitCode(ctx context.Context, ctr *libpod.Container) (int, error) {
exitCode, err := ctr.Wait(ctx)
if err != nil {
logrus.Errorf("Waiting for container %s: %v", ctr.ID(), err)
intExitCode := int(define.ExecErrorCodeNotFound)
return intExitCode
return intExitCode, err
}
return int(exitCode)
return int(exitCode), nil
}

func (ic *ContainerEngine) ContainerLogs(ctx context.Context, namesOrIds []string, options entities.ContainerLogsOptions) error {
Expand Down
9 changes: 9 additions & 0 deletions test/system/260-sdnotify.bats
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,15 @@ READY=1" "Container log after healthcheck run"
is "$output" "running" "make sure container is still running"

run_podman rm -f -t0 $ctr

ctr=$(random_string)
run_podman run --name $ctr \
--health-cmd="touch /terminate" \
--sdnotify=healthy \
$IMAGE sh -c 'while test \! -e /terminate; do sleep 0.1; done; echo finished'
is "$output" "finished" "make sure container exited successfully"
run_podman rm -f -t0 $ctr

_stop_socat
}

Expand Down