fix: don't panic when logs waits for more than 5 seconds

This removes panic when logs endpoint takes more than 5 seconds to respond. The panic happened at least with podman when no new logs appear when using follow and since parameters. We keep retrying until the context is canceled (the retry request would fail anyway with canceled context) or the producer is stopped, whichever comes first. This makes the retry behavior consistent with closed connections handling. Outstanding HTTP calls for fetching logs are now interrupted when a producer is stopped. Previously the consumer and StopProducer() waited for the HTTP call to complete. This should fix testcontainers#946
kiwicom · Jun 8, 2023 · 075b0f3 · 075b0f3
1 parent ada9d42
commit 075b0f3
Showing 1 changed file with 15 additions and 12 deletions.
diff --git a/docker.go b/docker.go
@@ -69,7 +69,7 @@ type DockerContainer struct {
 	terminationSignal chan bool
 	consumers         []LogConsumer
 	raw               *types.ContainerJSON
-	stopProducer      chan bool
+	stopProducer      context.CancelFunc
 	logger            Logging
 	lifecycleHooks    []ContainerLifecycleHooks
 }
@@ -632,9 +632,9 @@ func (c *DockerContainer) StartLogProducer(ctx context.Context) error {
 		return errors.New("log producer already started")
 	}
 
-	c.stopProducer = make(chan bool)
+	ctx, c.stopProducer = context.WithCancel(ctx)
 
-	go func(stop <-chan bool) {
+	go func() {
 		since := ""
 		// if the socket is closed we will make additional logs request with updated Since timestamp
 	BEGIN:
@@ -645,20 +645,22 @@ func (c *DockerContainer) StartLogProducer(ctx context.Context) error {
 			Since:      since,
 		}
 
-		ctx, cancel := context.WithTimeout(ctx, time.Second*5)
-		defer cancel()
-
 		r, err := c.provider.client.ContainerLogs(ctx, c.GetContainerID(), options)
 		if err != nil {
-			// if we can't get the logs, panic, we can't return an error to anything
-			// from within this goroutine
-			panic(err)
+			// if we can't get the logs, retry in one second.
+			c.logger.Printf("cannot get logs for container %q: %v", c.ID, err)
+			if ctx.Err() != nil {
+				// context done.
+				return
+			}
+			time.Sleep(1 * time.Second)
+			goto BEGIN
 		}
 		defer c.provider.Close()
 
 		for {
 			select {
-			case <-stop:
+			case <-ctx.Done():
 				err := r.Close()
 				if err != nil {
 					// we can't close the read closer, this should never happen
@@ -719,7 +721,7 @@ func (c *DockerContainer) StartLogProducer(ctx context.Context) error {
 				}
 			}
 		}
-	}(c.stopProducer)
+	}()
 
 	return nil
 }
@@ -728,7 +730,8 @@ func (c *DockerContainer) StartLogProducer(ctx context.Context) error {
 // and sending them to each added LogConsumer
 func (c *DockerContainer) StopLogProducer() error {
 	if c.stopProducer != nil {
-		c.stopProducer <- true
+		// Cancel the producer's context.
+		c.stopProducer()
 		c.stopProducer = nil
 	}
 	return nil